diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index 090da8d62..68765c6c4 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -67,7 +67,7 @@ `define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 34'h80000000 -`define TIM_RANGE 34'h07FFFFFF +`define TIM_RANGE 34'h7FFFFFFF `define CLINT_SUPPORTED 1'b1 `define CLINT_BASE 34'h02000000 `define CLINT_RANGE 34'h0000FFFF diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 44a90e1c2..56bfade59 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -73,7 +73,7 @@ `define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 -`define TIM_RANGE 56'h07FFFFFF +`define TIM_RANGE 56'h7FFFFFFF `define CLINT_SUPPORTED 1'b1 `define CLINT_BASE 56'h02000000 `define CLINT_RANGE 56'h0000FFFF diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 946e2d283..70355d3cd 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -3,36 +3,46 @@ quietly WaveActivateNextPane {} 0 add wave -noupdate /testbench/clk add wave -noupdate /testbench/reset add wave -noupdate /testbench/memfilename +add wave -noupdate /testbench/dut/hart/SATP_REGW add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/priv/trap/InstrValidM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/PCM +add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/RetM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/TrapM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/StoreStallD add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/DCacheStall +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LSUStall add wave -noupdate -group HDU -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE @@ -105,7 +115,7 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 @@ -209,23 +219,14 @@ add wave -noupdate -group icache -expand -group memory -group {tag write} /testb add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF -add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA -add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked -add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState -add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState -add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM -add wave -noupdate -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM -add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM -add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA -add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR @@ -240,19 +241,122 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW -add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/HRDATAW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/StallW -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/LSUStall +add wave -noupdate -group lsu -expand -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelPTW +add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/CurrState +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WalkerPageFaultM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WriteDataM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWayWriteEnable +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWordEnable +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SelAdrM +add wave -noupdate -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/DCacheMemWriteData +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/SetValid} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/SetDirty} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/Adr} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/WAdr} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/CacheTagMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/DirtyBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/ValidBits} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/SRAMAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayM +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayMaskedM +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockM +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataWordM +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/FinalReadDataWordM +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} -expand /testbench/dut/hart/lsu/dcache/ReadTag +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/WayHit +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/Dirty +add wave -noupdate -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/Valid +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimReadDataBLockWayMaskedM +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimReadDataBlockM +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimTag +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimWay +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimDirtyWay +add wave -noupdate -group lsu -expand -group dcache -expand -group Victim /testbench/dut/hart/lsu/dcache/VictimDirty +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemRWM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemAdrE +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemPAdrM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/pagetablewalker/DTLBMissM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/pagetablewalker/MemAdrM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct3M +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct7M +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/AtomicM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/CacheableM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/WriteDataM +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/ReadDataW +add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/DCacheStall +add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/hart/lsu/dcache/WayHit +add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/hart/lsu/dcache/CacheHit +add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableW +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/AHBPAdr +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/AHBRead +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/AHBWrite +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/AHBAck +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/HRDATA +add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/hart/lsu/dcache/HWDATA +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/genblk1/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/genblk1/tlb/tlbcontrol/Translate +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/genblk1/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/TLBHit +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/PhysicalAddress +add wave -noupdate -group lsu -group dtlb -label {Virtual Address} /testbench/dut/hart/lsu/dmmu/Address +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/hart/lsu/dmmu/TLBPageFault +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/hart/lsu/dmmu/LoadAccessFaultM +add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/hart/lsu/dmmu/StoreAccessFaultM +add wave -noupdate -group lsu -group dtlb /testbench/dut/hart/lsu/dmmu/genblk1/tlb/TLBPAdr +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/hart/lsu/dmmu/genblk1/tlb/Address +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/hart/lsu/dmmu/genblk1/tlb/PTE +add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/hart/lsu/dmmu/genblk1/tlb/TLBWrite +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/pmachecker/SelRegions +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/Cacheable +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/Idempotent +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/AtomicAllowed +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/PMALoadAccessFaultM +add wave -noupdate -group lsu -group pma /testbench/dut/hart/lsu/dmmu/PMAStoreAccessFaultM +add wave -noupdate -group lsu -expand -group pmp /testbench/dut/hart/lsu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -group lsu -expand -group pmp /testbench/dut/hart/lsu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -group lsu -expand -group pmp /testbench/dut/hart/lsu/dmmu/PMPStoreAccessFaultM +add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/hart/lsu/pagetablewalker/genblk1/WalkerState +add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/genblk1/EndWalk +add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/genblk1/PreviousWalkerState +add wave -noupdate -group lsu -expand -group ptwalker -color Salmon /testbench/dut/hart/lsu/pagetablewalker/HPTWStall +add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/HPTWReadPTE +add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/genblk1/CurrentPTE +add wave -noupdate -group lsu -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/ITLBMissF +add wave -noupdate -group lsu -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/ITLBWriteF +add wave -noupdate -group lsu -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/DTLBMissM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/DTLBWriteM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/PageTableEntryF +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/PageTableEntryM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/PageTypeF +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/PageTypeM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/genblk1/CurrentPTE +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/HPTWPAdrE +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/HPTWPAdrM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/HPTWRead +add wave -noupdate -group lsu -expand -group ptwalker -divider data +add wave -noupdate -group lsu -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/ITLBWriteF +add wave -noupdate -group lsu -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/DTLBWriteM +add wave -noupdate -group lsu -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerInstrPageFaultF +add wave -noupdate -group lsu -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerLoadPageFaultM +add wave -noupdate -group lsu -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerStorePageFaultM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/hart/lsu/pagetablewalker/WalkerStorePageFaultM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/hart/lsu/pagetablewalker/WalkerLoadPageFaultM +add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/hart/lsu/pagetablewalker/WalkerInstrPageFaultF add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR @@ -280,45 +384,20 @@ add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsIn add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsOut add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsEn add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOIntr -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM -add wave -noupdate -expand -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM -add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/lsu/pagetablewalker/genblk1/WalkerState -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/MMUTranslate -add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/lsu/pagetablewalker/HPTWStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/HPTWRead -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/ITLBWriteF -add wave -noupdate -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/DTLBWriteM -add wave -noupdate -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/ITLBMissF -add wave -noupdate -expand -group ptwalker -expand -group miss/write /testbench/dut/hart/lsu/pagetablewalker/DTLBMissM -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/MMUReadPTE -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/lsu/pagetablewalker/genblk1/CurrentPTE -add wave -noupdate -expand -group ptwalker -divider data -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/ITLBWriteF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/DTLBWriteM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerInstrPageFaultF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerLoadPageFaultM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/lsu/pagetablewalker/WalkerStorePageFaultM -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/lsu/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/lsu/arbiter/CurrState -add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/lsu/arbiter/SelPTW -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/lsu/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/lsu/arbiter/HPTWRead -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/lsu/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/lsu/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/lsu/arbiter/MemAdrMtoLSU +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn @@ -342,18 +421,20 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb -add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss -add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate /testbench/dut/hart/lsu/pagetablewalker/MemAdrM -add wave -noupdate /testbench/dut/hart/lsu/pagetablewalker/DTLBMissM -add wave -noupdate /testbench/dut/hart/lsu/pagetablewalker/MemAdrM -add wave -noupdate /testbench/dut/hart/lsu/MemAdrM -add wave -noupdate /testbench/dut/hart/lsu/pagetablewalker/PCF +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/TLBWrite +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/Address +add wave -noupdate -group UART /testbench/dut/uncore/genblk4/uart/HCLK +add wave -noupdate -group UART /testbench/dut/uncore/genblk4/uart/HSELUART +add wave -noupdate -group UART /testbench/dut/uncore/genblk4/uart/HADDR +add wave -noupdate -group UART /testbench/dut/uncore/genblk4/uart/HWRITE +add wave -noupdate -group UART /testbench/dut/uncore/genblk4/uart/HWDATA TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {16658 ns} 1} {{Cursor 4} {16655 ns} 0} +WaveRestoreCursors {{Cursor 4} {5126 ns} 0} {{Cursor 2} {40310 ns} 0} {{Cursor 3} {6427 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 -configure wave -valuecolwidth 189 +configure wave -valuecolwidth 297 configure wave -justifyvalue left configure wave -signalnamewidth 1 configure wave -snapdistance 10 @@ -366,4 +447,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {16565 ns} {16719 ns} +WaveRestoreZoom {0 ns} {697884 ns} diff --git a/wally-pipelined/src/cache/DCacheMem.sv b/wally-pipelined/src/cache/DCacheMem.sv new file mode 100644 index 000000000..17591abe3 --- /dev/null +++ b/wally-pipelined/src/cache/DCacheMem.sv @@ -0,0 +1,95 @@ +/////////////////////////////////////////// +// DCacheMem (Memory for the Data Cache) +// +// Written: ross1728@gmail.com July 07, 2021 +// Implements the data, tag, valid, dirty, and replacement bits. +// +// Purpose: Storage and read/write access to data cache data, tag valid, dirty, and replacement. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module DCacheMem #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26) + (input logic clk, + input logic reset, + + input logic [$clog2(NUMLINES)-1:0] Adr, + input logic [$clog2(NUMLINES)-1:0] WAdr, // write address for valid and dirty only + input logic WriteEnable, + input logic [BLOCKLEN/`XLEN-1:0] WriteWordEnable, + input logic TagWriteEnable, + input logic [BLOCKLEN-1:0] WriteData, + input logic [TAGLEN-1:0] WriteTag, + input logic SetValid, + input logic ClearValid, + input logic SetDirty, + input logic ClearDirty, + + output logic [BLOCKLEN-1:0] ReadData, + output logic [TAGLEN-1:0] ReadTag, + output logic Valid, + output logic Dirty + ); + + logic [NUMLINES-1:0] ValidBits, DirtyBits; + + + genvar words; + + generate + for(words = 0; words < BLOCKLEN/`XLEN; words++) begin : word + sram1rw #(.DEPTH(`XLEN), + .WIDTH(NUMLINES)) + CacheDataMem(.clk(clk), + .Addr(Adr), + .ReadData(ReadData[(words+1)*`XLEN-1:words*`XLEN]), + .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), + .WriteEnable(WriteEnable & WriteWordEnable[words])); + end + endgenerate + + sram1rw #(.DEPTH(TAGLEN), + .WIDTH(NUMLINES)) + CacheTagMem(.clk(clk), + .Addr(Adr), + .ReadData(ReadTag), + .WriteData(WriteTag), + .WriteEnable(TagWriteEnable)); + + + always_ff @(posedge clk, posedge reset) begin + if (reset) + ValidBits <= {NUMLINES{1'b0}}; + else if (SetValid & WriteEnable) ValidBits[WAdr] <= 1'b1; + else if (ClearValid & WriteEnable) ValidBits[WAdr] <= 1'b0; + Valid <= ValidBits[Adr]; + end + + always_ff @(posedge clk, posedge reset) begin + if (reset) + DirtyBits <= {NUMLINES{1'b0}}; + else if (SetDirty & WriteEnable) DirtyBits[WAdr] <= 1'b1; + else if (ClearDirty & WriteEnable) DirtyBits[WAdr] <= 1'b0; + Dirty <= DirtyBits[Adr]; + end + + +endmodule // DCacheMemWay + + diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index e7098d755..629ec7cc5 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -196,6 +196,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0; assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit. // verilator lint_off WIDTH + // *** Bug width is wrong. assign FetchCountFlag = (FetchCount == FetchCountThreshold); // verilator lint_on WIDTH @@ -413,6 +414,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) assign NextFetchCount = FetchCount + 1'b1; // This part is confusing. + // *** Ross Thompson reduce the complexity. This is just dumb. // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with diff --git a/wally-pipelined/src/cache/ICacheMem.sv b/wally-pipelined/src/cache/ICacheMem.sv index 9a5fdbe2f..ce3507ba1 100644 --- a/wally-pipelined/src/cache/ICacheMem.sv +++ b/wally-pipelined/src/cache/ICacheMem.sv @@ -8,8 +8,8 @@ module ICacheMem #(parameter NUMLINES=512, parameter BLOCKLEN = 256) // If flush is high, invalidate the entire cache input logic flush, - input logic [`PA_BITS-1:0] PCTagF, // physical address - input logic [`PA_BITS-1:0] PCNextIndexF, // virtual address + input logic [`PA_BITS-1:0] PCTagF, // physical address + input logic [`PA_BITS-1:0] PCNextIndexF, // virtual address input logic WriteEnable, input logic [BLOCKLEN-1:0] WriteLine, output logic [BLOCKLEN-1:0] ReadLineF, diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv new file mode 100644 index 000000000..66c857fd1 --- /dev/null +++ b/wally-pipelined/src/cache/dcache.sv @@ -0,0 +1,731 @@ +/////////////////////////////////////////// +// dcache (data cache) +// +// Written: ross1728@gmail.com July 07, 2021 +// Implements the L1 data cache +// +// Purpose: Storage for data and meta data. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module dcache + (input logic clk, + input logic reset, + input logic StallM, + input logic StallW, + input logic FlushM, + input logic FlushW, + + // cpu side + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [6:0] Funct7M, + input logic [1:0] AtomicM, + input logic [`XLEN-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] MemPAdrM, // physical address + + input logic [`XLEN-1:0] WriteDataM, + output logic [`XLEN-1:0] ReadDataW, + output logic [`XLEN-1:0] ReadDataM, + output logic DCacheStall, + output logic CommittedM, + + // inputs from TLB and PMA/P + input logic ExceptionM, + input logic PendingInterruptM, + input logic DTLBMissM, + input logic CacheableM, + input logic DTLBWriteM, + // from ptw + input logic SelPTW, + input logic WalkerPageFaultM, + // ahb side + output logic [`PA_BITS-1:0] AHBPAdr, // to ahb + output logic AHBRead, + output logic AHBWrite, + input logic AHBAck, // from ahb + input logic [`XLEN-1:0] HRDATA, // from ahb + output logic [`XLEN-1:0] HWDATA // to ahb + ); + + localparam integer BLOCKLEN = 256; + localparam integer NUMLINES = 64; + localparam integer NUMWAYS = 4; + localparam integer NUMREPL_BITS = 3; + + localparam integer BLOCKBYTELEN = BLOCKLEN/8; + localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + localparam integer INDEXLEN = $clog2(NUMLINES); + localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; + localparam integer WORDSPERLINE = BLOCKLEN/`XLEN; + localparam integer LOGWPL = $clog2(WORDSPERLINE); + localparam integer LOGXLENBYTES = $clog2(`XLEN/8); + + + logic SelAdrM; + logic [INDEXLEN-1:0] SRAMAdr; + logic [BLOCKLEN-1:0] SRAMWriteData; + logic [BLOCKLEN-1:0] DCacheMemWriteData; + logic SetValidM, ClearValidM; + logic SetDirtyM, ClearDirtyM; + logic [BLOCKLEN-1:0] ReadDataBlockWayM [NUMWAYS-1:0]; + logic [BLOCKLEN-1:0] ReadDataBlockWayMaskedM [NUMWAYS-1:0]; + logic [BLOCKLEN-1:0] VictimReadDataBLockWayMaskedM [NUMWAYS-1:0]; + logic [TAGLEN-1:0] ReadTag [NUMWAYS-1:0]; + logic [NUMWAYS-1:0] Valid, Dirty, WayHit; + logic CacheHit; + logic [NUMREPL_BITS-1:0] ReplacementBits [NUMLINES-1:0]; + logic [NUMREPL_BITS-1:0] NewReplacement; + logic [BLOCKLEN-1:0] ReadDataBlockM; + logic [`XLEN-1:0] ReadDataBlockSetsM [(WORDSPERLINE)-1:0]; + logic [`XLEN-1:0] VictimReadDataBlockSetsM [(WORDSPERLINE)-1:0]; + logic [`XLEN-1:0] ReadDataWordM, FinalReadDataWordM, ReadDataWordMuxM; + logic [`XLEN-1:0] FinalWriteDataM, FinalAMOWriteDataM; + logic [BLOCKLEN-1:0] FinalWriteDataWordsM; + logic [LOGWPL:0] FetchCount, NextFetchCount; + logic [WORDSPERLINE-1:0] SRAMWordEnable; + logic SelMemWriteDataM; + logic [2:0] Funct3W; + + logic SRAMWordWriteEnableM, SRAMWordWriteEnableW; + logic SRAMBlockWriteEnableM; + logic SRAMWriteEnable; + logic [NUMWAYS-1:0] SRAMWayWriteEnable; + + + logic SaveSRAMRead; + logic [1:0] AtomicW; + logic [NUMWAYS-1:0] VictimWay; + logic [NUMWAYS-1:0] VictimDirtyWay; + logic [BLOCKLEN-1:0] VictimReadDataBlockM; + logic VictimDirty; + logic SelAMOWrite; + logic SelUncached; + logic [6:0] Funct7W; + logic [2**LOGWPL-1:0] MemPAdrDecodedW; + + logic [`PA_BITS-1:0] BasePAdrM; + logic [OFFSETLEN-1:0] BasePAdrOffsetM; + logic [`PA_BITS-1:0] BasePAdrMaskedM; + logic [TAGLEN-1:0] VictimTagWay [NUMWAYS-1:0]; + logic [TAGLEN-1:0] VictimTag; + + logic ReadDataWEn; + + logic AnyCPUReqM; + logic FetchCountFlag; + logic PreCntEn; + logic CntEn; + logic CntReset; + logic CPUBusy, PreviousCPUBusy; + logic SelEvict; + + + typedef enum {STATE_READY, + + STATE_MISS_FETCH_WDV, + STATE_MISS_FETCH_DONE, + STATE_MISS_EVICT_DIRTY, + STATE_MISS_WRITE_BACK_EVICTED_BLOCK, + STATE_MISS_WRITE_CACHE_BLOCK, + STATE_MISS_READ_WORD, + STATE_MISS_READ_WORD_DELAY, + STATE_MISS_WRITE_WORD, + + STATE_AMO_MISS_FETCH_WDV, + STATE_AMO_MISS_FETCH_DONE, + STATE_AMO_MISS_CHECK_EVICTED_DIRTY, + STATE_AMO_MISS_WRITE_BACK_EVICTED_BLOCK, + STATE_AMO_MISS_WRITE_CACHE_BLOCK, + STATE_AMO_MISS_READ_WORD, + STATE_AMO_MISS_UPDATE_WORD, + STATE_AMO_MISS_WRITE_WORD, + STATE_AMO_UPDATE, + STATE_AMO_WRITE, + + STATE_PTW_READY, + STATE_PTW_READ_MISS_FETCH_WDV, + STATE_PTW_READ_MISS_FETCH_DONE, + STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK, + STATE_PTW_READ_MISS_READ_WORD, + STATE_PTW_READ_MISS_READ_WORD_DELAY, + STATE_PTW_ACCESS_AFTER_WALK, + STATE_PTW_UPDATE_TLB, + + STATE_UNCACHED_WRITE, + STATE_UNCACHED_WRITE_DONE, + STATE_UNCACHED_READ, + STATE_UNCACHED_READ_DONE, + + STATE_CPU_BUSY} statetype; + + statetype CurrState, NextState; + + + flopenr #(7) Funct7WReg(.clk(clk), + .reset(reset), + .en(~StallW), + .d(Funct7M), + .q(Funct7W)); + + + + // data path + + mux2 #(INDEXLEN) + AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .d1(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .s(SelAdrM), + .y(SRAMAdr)); + + + oneHotDecoder #(LOGWPL) + oneHotDecoder(.bin(MemPAdrM[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), + .decoded(MemPAdrDecodedW)); + + + assign SRAMWordEnable = SRAMBlockWriteEnableM ? '1 : MemPAdrDecodedW; + + + genvar way; + generate + for(way = 0; way < NUMWAYS; way = way + 1) begin :CacheWays + DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN)) + MemWay(.clk(clk), + .reset(reset), + .Adr(SRAMAdr), + .WAdr(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .WriteEnable(SRAMWayWriteEnable[way]), + .WriteWordEnable(SRAMWordEnable), + .TagWriteEnable(SRAMBlockWriteEnableM), + .WriteData(SRAMWriteData), + .WriteTag(MemPAdrM[`PA_BITS-1:OFFSETLEN+INDEXLEN]), + .SetValid(SetValidM), + .ClearValid(ClearValidM), + .SetDirty(SetDirtyM), + .ClearDirty(ClearDirtyM), + .ReadData(ReadDataBlockWayM[way]), + .ReadTag(ReadTag[way]), + .Valid(Valid[way]), + .Dirty(Dirty[way])); + assign WayHit[way] = Valid[way] & (ReadTag[way] == MemPAdrM[`PA_BITS-1:OFFSETLEN+INDEXLEN]); + assign ReadDataBlockWayMaskedM[way] = Valid[way] ? ReadDataBlockWayM[way] : '0; // first part of AO mux. + + // the cache block candiate for eviction + // *** this should be sharable with the read data muxing, but for now i'm doing the simple + // thing and making them separate. + assign VictimReadDataBLockWayMaskedM[way] = VictimWay[way] ? ReadDataBlockWayM[way] : '0; + assign VictimDirtyWay[way] = VictimWay[way] & Dirty[way] & Valid[way]; + assign VictimTagWay[way] = Valid[way] ? ReadTag[way] : '0; + end + endgenerate + + always_ff @(posedge clk, posedge reset) begin + if (reset) begin + for(int index = 0; index < NUMLINES-1; index++) + ReplacementBits[index] <= '0; + end + else if (SRAMWriteEnable) ReplacementBits[MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]] <= NewReplacement; + end + + // *** TODO add replacement policy + assign NewReplacement = '0; + assign VictimWay = 4'b0001; + mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWriteEnableM ? WayHit : '0), + .d1(SRAMBlockWriteEnableM ? VictimWay : '0), + .s(SRAMBlockWriteEnableM), + .y(SRAMWayWriteEnable)); + + + + assign CacheHit = |WayHit; + // ReadDataBlockWayMaskedM is a 2d array of cache block len by number of ways. + // Need to OR together each way in a bitwise manner. + // Final part of the AO Mux. + genvar index; + always_comb begin + ReadDataBlockM = '0; + VictimReadDataBlockM = '0; + VictimTag = '0; + for(int index = 0; index < NUMWAYS; index++) begin + ReadDataBlockM = ReadDataBlockM | ReadDataBlockWayMaskedM[index]; + VictimReadDataBlockM = VictimReadDataBlockM | VictimReadDataBLockWayMaskedM[index]; + VictimTag = VictimTag | VictimTagWay[index]; + end + end + assign VictimDirty = | VictimDirtyWay; + + + // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can + // easily build a variable input mux. + generate + for (index = 0; index < WORDSPERLINE; index++) begin + assign ReadDataBlockSetsM[index] = ReadDataBlockM[((index+1)*`XLEN)-1: (index*`XLEN)]; + assign VictimReadDataBlockSetsM[index] = VictimReadDataBlockM[((index+1)*`XLEN)-1: (index*`XLEN)]; + end + endgenerate + + // variable input mux + assign ReadDataWordM = ReadDataBlockSetsM[MemPAdrM[$clog2(WORDSPERLINE+`XLEN/8) : $clog2(`XLEN/8)]]; + + + // *** fix width later. + // verilator lint_off WIDTH + assign HWDATA = CacheableM ? VictimReadDataBlockSetsM[FetchCount] : WriteDataM; + // verilator lint_on WIDTH + + mux2 #(`XLEN) UnCachedDataMux(.d0(ReadDataWordM), + .d1(DCacheMemWriteData[`XLEN-1:0]), + .s(SelUncached), + .y(ReadDataWordMuxM)); + + // finally swr + // *** BUG fix HSIZED? why was it this way? + subwordread subwordread(.HRDATA(ReadDataWordMuxM), + .HADDRD(MemPAdrM[2:0]), + .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), + .HRDATAMasked(FinalReadDataWordM)); + + // This is a confusing point. + // The final read data should be updated only if the CPU's StallW is low + // which means the CPU is ready to take data. Or if the CPU just became + // busy. Then when we exit CPU_BUSY we want to ensure the data is not + // updated, this is ~PreviousCPUBusy. + // also must update if cpu stalled and processing a read miss + // which occurs if in state miss read word delay. + assign CPUBusy = CurrState == STATE_CPU_BUSY; + flop #(1) CPUBusyReg(.clk, .d(CPUBusy), .q(PreviousCPUBusy)); + + assign ReadDataWEn = (~StallW & ~PreviousCPUBusy) | + (NextState == STATE_CPU_BUSY & CurrState == STATE_READY) | + (CurrState == STATE_MISS_READ_WORD_DELAY); + + flopen #(`XLEN) ReadDataWReg(.clk(clk), + .en(ReadDataWEn), + .d(FinalReadDataWordM), + .q(ReadDataW)); + + assign ReadDataM = FinalReadDataWordM; + + // write path + subwordwrite subwordwrite(.HRDATA(ReadDataWordM), + .HADDRD(MemPAdrM[2:0]), + .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), + .HWDATAIN(WriteDataM), + .HWDATA(FinalWriteDataM)); + + generate + if (`A_SUPPORTED) begin + logic [`XLEN-1:0] AMOResult; + amoalu amoalu(.srca(FinalReadDataWordM), .srcb(WriteDataM), .funct(Funct7M), .width(Funct3M[1:0]), + .result(AMOResult)); + mux2 #(`XLEN) wdmux(FinalWriteDataM, AMOResult, SelAMOWrite & AtomicM[1], FinalAMOWriteDataM); + end else + assign FinalAMOWriteDataM = FinalWriteDataM; + endgenerate + + + // register the fetch data from the next level of memory. + generate + for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer + flopen #(`XLEN) fb(.clk(clk), + .en(AHBAck & AHBRead & (index == FetchCount)), + .d(HRDATA), + .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); + end + endgenerate + + // *** Coding style. this is just awful. The purpose is to align FetchCount to the + // size of XLEN so we can fetch XLEN bits. FetchCount needs to be padded to PA_BITS length. + // *** optimize this + mux2 #(`PA_BITS) BaseAdrMux(.d0(MemPAdrM), + .d1({VictimTag, MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), + .s(SelEvict), + .y(BasePAdrM)); + + assign BasePAdrOffsetM = CacheableM ? {{OFFSETLEN}{1'b0}} : BasePAdrM[OFFSETLEN-1:0]; + assign BasePAdrMaskedM = {BasePAdrM[`PA_BITS-1:OFFSETLEN], BasePAdrOffsetM}; + + generate + if (`XLEN == 32) begin + assign AHBPAdr = ({{`PA_BITS-4{1'b0}}, FetchCount} << 2) + BasePAdrMaskedM; + end else begin + assign AHBPAdr = ({{`PA_BITS-3{1'b0}}, FetchCount} << 3) + BasePAdrMaskedM; + end + endgenerate + + + // mux between the CPU's write and the cache fetch. + generate + for(index = 0; index < WORDSPERLINE; index++) begin + assign FinalWriteDataWordsM[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalAMOWriteDataM; + end + endgenerate + + mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsM), + .d1(DCacheMemWriteData), + .s(SRAMBlockWriteEnableM), + .y(SRAMWriteData)); + + + // control path *** eventually move to own module. + + + + localparam FetchCountThreshold = WORDSPERLINE - 1; + + + assign AnyCPUReqM = |MemRWM | (|AtomicM); + assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL:0]); + + flopenr #(LOGWPL+1) + FetchCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextFetchCount), + .q(FetchCount)); + + assign NextFetchCount = FetchCount + 1'b1; + + assign SRAMWriteEnable = SRAMBlockWriteEnableM | SRAMWordWriteEnableM; + + flopr #(1) + SRAMWritePipeReg(.clk(clk), + .reset(reset), + .d({SRAMWordWriteEnableM}), + .q({SRAMWordWriteEnableW})); + + + always_ff @(posedge clk, posedge reset) + if (reset) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + + // next state logic and some state ouputs. + always_comb begin + DCacheStall = 1'b0; + SelAdrM = 1'b0; + PreCntEn = 1'b0; + SetValidM = 1'b0; + ClearValidM = 1'b0; + SetDirtyM = 1'b0; + ClearDirtyM = 1'b0; + SelMemWriteDataM = 1'b0; + SRAMWordWriteEnableM = 1'b0; + SRAMBlockWriteEnableM = 1'b0; + SaveSRAMRead = 1'b1; + CntReset = 1'b0; + AHBRead = 1'b0; + AHBWrite = 1'b0; + SelAMOWrite = 1'b0; + CommittedM = 1'b0; + SelUncached = 1'b0; + SelEvict = 1'b0; + + case (CurrState) + STATE_READY: begin + // TLB Miss + if(AnyCPUReqM & DTLBMissM) begin + // the LSU arbiter has not yet selected the PTW. + // The CPU needs to be stalled until that happens. + // If we set DCacheStall for 1 cycle before going to + // PTW ready the CPU will stall. + // The page table walker asserts it's control 1 cycle + // after the TLBs miss. + DCacheStall = 1'b1; + NextState = STATE_READY; + end + else if(SelPTW) begin + // Now we have activated the ptw. + // Do not assert Stall as we are now directing the stall the ptw. + NextState = STATE_PTW_READY; + CommittedM = 1'b1; + end + // amo hit + else if(|AtomicM & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + NextState = STATE_AMO_UPDATE; + DCacheStall = 1'b1; + + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_AMO_UPDATE; + end + // read hit valid cached + else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + DCacheStall = 1'b0; + + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + // write hit valid cached + else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + SelAdrM = 1'b1; + DCacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirtyM = 1'b1; + + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + // read or write miss valid cached + else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit & ~DTLBMissM) begin + NextState = STATE_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end + // uncached write + else if(MemRWM[0] & ~CacheableM & ~ExceptionM & ~DTLBMissM) begin + NextState = STATE_UNCACHED_WRITE; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + end + // uncached read + else if(MemRWM[1] & ~CacheableM & ~ExceptionM & ~DTLBMissM) begin + NextState = STATE_UNCACHED_READ; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBRead = 1'b1; + end + // fault + else if(AnyCPUReqM & (ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_READY; + end + else NextState = STATE_READY; + end + + STATE_AMO_UPDATE: begin + NextState = STATE_AMO_WRITE; + SaveSRAMRead = 1'b1; + SRAMWordWriteEnableM = 1'b1; // pipelined 1 cycle + end + STATE_AMO_WRITE: begin + SelAMOWrite = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + STATE_MISS_FETCH_WDV: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBRead = 1'b1; + SelAdrM = 1'b1; + CommittedM = 1'b1; + + if (FetchCountFlag & AHBAck) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + + STATE_MISS_FETCH_DONE: begin + DCacheStall = 1'b1; + SelAdrM = 1'b1; + CntReset = 1'b1; + CommittedM = 1'b1; + if(VictimDirty) begin + NextState = STATE_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end + end + + STATE_MISS_WRITE_CACHE_BLOCK: begin + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_MISS_READ_WORD; + SelAdrM = 1'b1; + SetValidM = 1'b1; + ClearDirtyM = 1'b1; + CommittedM = 1'b1; + end + + STATE_MISS_READ_WORD: begin + SelAdrM = 1'b1; + DCacheStall = 1'b1; + CommittedM = 1'b1; + if (MemRWM[1]) begin + NextState = STATE_MISS_READ_WORD_DELAY; + // delay state is required as the read signal MemRWM[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end else begin + NextState = STATE_MISS_WRITE_WORD; + end + end + + STATE_MISS_READ_WORD_DELAY: begin + SelAdrM = 1'b1; + CommittedM = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + STATE_MISS_WRITE_WORD: begin + SRAMWordWriteEnableM = 1'b1; + SetDirtyM = 1'b1; + SelAdrM = 1'b1; + DCacheStall = 1'b0; + CommittedM = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + STATE_MISS_EVICT_DIRTY: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBWrite = 1'b1; + SelAdrM = 1'b1; + CommittedM = 1'b1; + SelEvict = 1'b1; + if( FetchCountFlag & AHBAck) begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_MISS_EVICT_DIRTY; + end + end + + STATE_PTW_READY: begin + // now all output connect to PTW instead of CPU. + CommittedM = 1'b1; + // return to ready if page table walk completed. + if (~SelPTW & ~WalkerPageFaultM) begin + NextState = STATE_PTW_ACCESS_AFTER_WALK; + + // read hit valid cached + end else if(MemRWM[1] & CacheableM & ~ExceptionM & CacheHit) begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + end + + // read miss valid cached + else if((MemRWM[1]) & CacheableM & ~ExceptionM & ~CacheHit) begin + NextState = STATE_PTW_READ_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end + + // walker has issue abort back to ready + else if(~SelPTW & WalkerPageFaultM) begin + NextState = STATE_READY; + DCacheStall = 1'b0; + end + end + + STATE_PTW_READ_MISS_FETCH_WDV: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBRead = 1'b1; + SelAdrM = 1'b1; + CommittedM = 1'b1; + + if (FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_READ_MISS_FETCH_DONE; + end else begin + NextState = STATE_PTW_READ_MISS_FETCH_WDV; + end + end + + STATE_PTW_READ_MISS_FETCH_DONE: begin + DCacheStall = 1'b1; + SelAdrM = 1'b1; + CntReset = 1'b1; + CommittedM = 1'b1; + NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; + end + + STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK: begin + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD; + SelAdrM = 1'b1; + SetValidM = 1'b1; + ClearDirtyM = 1'b1; + CommittedM = 1'b1; + end + + STATE_PTW_READ_MISS_READ_WORD: begin + SelAdrM = 1'b1; + DCacheStall = 1'b1; + CommittedM = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD_DELAY; + end + + STATE_PTW_READ_MISS_READ_WORD_DELAY: begin + SelAdrM = 1'b1; + NextState = STATE_PTW_READY; + CommittedM = 1'b1; + end + + STATE_PTW_ACCESS_AFTER_WALK: begin + DCacheStall = 1'b1; + SelAdrM = 1'b1; + CommittedM = 1'b1; + NextState = STATE_READY; + end + + STATE_CPU_BUSY : begin + CommittedM = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + STATE_UNCACHED_WRITE : begin + DCacheStall = 1'b1; + AHBWrite = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_WRITE_DONE; + end else begin + NextState = STATE_UNCACHED_WRITE; + end + end + + STATE_UNCACHED_READ : begin + DCacheStall = 1'b1; + AHBRead = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_READ_DONE; + end else begin + NextState = STATE_UNCACHED_READ; + end + end + + STATE_UNCACHED_WRITE_DONE: begin + CommittedM = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + STATE_UNCACHED_READ_DONE: begin + CommittedM = 1'b1; + SelUncached = 1'b1; + if(StallW) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + end + + default: begin + end + endcase + end + + assign CntEn = PreCntEn & AHBAck; + +endmodule // dcache diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 4bd079e96..302b50756 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -35,53 +35,51 @@ package ahbliteState; endpackage module ahblite ( - input logic clk, reset, - input logic StallW, FlushW, + input logic clk, reset, + input logic StallW, // Load control - input logic UnsignedLoadM, - input logic [1:0] AtomicMaskedM, - input logic [6:0] Funct7M, + input logic UnsignedLoadM, + input logic [1:0] AtomicMaskedM, + input logic [6:0] Funct7M, // Signals from Instruction Cache - input logic [`PA_BITS-1:0] InstrPAdrF, // *** rename these to match block diagram - input logic InstrReadF, - output logic [`XLEN-1:0] InstrRData, - output logic InstrAckF, + input logic [`PA_BITS-1:0] InstrPAdrF, // *** rename these to match block diagram + input logic InstrReadF, + output logic [`XLEN-1:0] InstrRData, + output logic InstrAckF, // Signals from Data Cache - input logic [`PA_BITS-1:0] MemPAdrM, - input logic MemReadM, MemWriteM, - input logic [`XLEN-1:0] WriteDataM, - input logic [1:0] MemSizeM, - //output logic DataStall, - // Signals from MMU - // Signals from PMA checker - input logic DSquashBusAccessM, ISquashBusAccessF, - // Signals to PMA checker (metadata of proposed access) + input logic [`PA_BITS-1:0] DCtoAHBPAdrM, + input logic DCtoAHBReadM, + input logic DCtoAHBWriteM, + input logic [`XLEN-1:0] DCtoAHBWriteData, + output logic [`XLEN-1:0] DCfromAHBReadData, + input logic [1:0] MemSizeM, // *** remove + output logic DCfromAHBAck, // Return from bus - output logic [`XLEN-1:0] HRDATAW, + output logic [`XLEN-1:0] HRDATAW, // AHB-Lite external signals - input logic [`AHBW-1:0] HRDATA, - input logic HREADY, HRESP, - output logic HCLK, HRESETn, - output logic [31:0] HADDR, - output logic [`AHBW-1:0] HWDATA, - output logic HWRITE, - output logic [2:0] HSIZE, - output logic [2:0] HBURST, - output logic [3:0] HPROT, - output logic [1:0] HTRANS, - output logic HMASTLOCK, + input logic [`AHBW-1:0] HRDATA, + input logic HREADY, HRESP, + output logic HCLK, HRESETn, + output logic [31:0] HADDR, + output logic [`AHBW-1:0] HWDATA, + output logic HWRITE, + output logic [2:0] HSIZE, + output logic [2:0] HBURST, + output logic [3:0] HPROT, + output logic [1:0] HTRANS, + output logic HMASTLOCK, // Delayed signals for writes - output logic [2:0] HADDRD, - output logic [3:0] HSIZED, - output logic HWRITED, + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED, // Stalls - output logic CommitM, MemAckW + output logic CommitM ); logic GrantData; logic [31:0] AccessAddress; logic [2:0] AccessSize, PTESize, ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedHRDATAMasked, HRDATANext, WriteData; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, HRDATANext, CapturedHRDATAMasked, WriteData; logic IReady, DReady; logic CaptureDataM,CapturedDataAvailable; @@ -95,7 +93,7 @@ module ahblite ( // while an instruction read is occuring, the instruction read finishes before // the data access can take place. import ahbliteState::*; - statetype BusState, ProposedNextBusState, NextBusState; + statetype BusState, NextBusState; flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState); @@ -109,54 +107,32 @@ module ahblite ( // interface that might be used in place of the ahblite. always_comb case (BusState) - IDLE: /*if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE; - else*/ if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD; - else if (MemReadM) ProposedNextBusState = MEMREAD; // Memory has priority over instructions - else if (MemWriteM) ProposedNextBusState = MEMWRITE; - else if (InstrReadF) ProposedNextBusState = INSTRREAD; - else ProposedNextBusState = IDLE; -/* -----\/----- EXCLUDED -----\/----- - MMUTRANSLATE: if (~HREADY) ProposedNextBusState = MMUTRANSLATE; - else ProposedNextBusState = IDLE; - -----/\----- EXCLUDED -----/\----- */ - ATOMICREAD: if (~HREADY) ProposedNextBusState = ATOMICREAD; - else ProposedNextBusState = ATOMICWRITE; - ATOMICWRITE: if (~HREADY) ProposedNextBusState = ATOMICWRITE; - else if (InstrReadF) ProposedNextBusState = INSTRREAD; - else ProposedNextBusState = IDLE; - MEMREAD: if (~HREADY) ProposedNextBusState = MEMREAD; - else if (InstrReadF) ProposedNextBusState = INSTRREAD; - else ProposedNextBusState = IDLE; - MEMWRITE: if (~HREADY) ProposedNextBusState = MEMWRITE; - else if (InstrReadF) ProposedNextBusState = INSTRREAD; - else ProposedNextBusState = IDLE; - INSTRREAD: if (~HREADY) ProposedNextBusState = INSTRREAD; - else ProposedNextBusState = IDLE; // if (InstrReadF still high) - default: ProposedNextBusState = IDLE; + IDLE: if (AtomicMaskedM[1]) NextBusState = ATOMICREAD; + else if (DCtoAHBReadM) NextBusState = MEMREAD; // Memory has priority over instructions + else if (DCtoAHBWriteM) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + ATOMICREAD: if (~HREADY) NextBusState = ATOMICREAD; + else NextBusState = ATOMICWRITE; + ATOMICWRITE: if (~HREADY) NextBusState = ATOMICWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMREAD: if (~HREADY) NextBusState = MEMREAD; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + MEMWRITE: if (~HREADY) NextBusState = MEMWRITE; + else if (InstrReadF) NextBusState = INSTRREAD; + else NextBusState = IDLE; + INSTRREAD: if (~HREADY) NextBusState = INSTRREAD; + else NextBusState = IDLE; // if (InstrReadF still high) + default: NextBusState = IDLE; endcase - // Determine access type (important for determining whether to fault) -// (ProposedNextBusState == MMUTRANSLATE); - - // The PMA and PMP checkers can decide to squash the access - // *** this probably needs to be controlled by the caches rather than EBU dh 7/2/11 - assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState; - - // stall signals - // Note that we need to extend both stalls when MMUTRANSLATE goes to idle, - // since translation might not be complete. - // *** Ross Thompson remove this datastall -/* -----\/----- EXCLUDED -----\/----- - assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || - (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE)); - -----/\----- EXCLUDED -----/\----- */ - - // bus outputs - assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) || - (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE); - assign #1 AccessAddress = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; + assign #1 GrantData = (NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE); + assign #1 AccessAddress = (GrantData) ? DCtoAHBPAdrM[31:0] : InstrPAdrF[31:0]; //assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress; assign #1 HADDR = AccessAddress; generate @@ -185,11 +161,11 @@ module ahblite ( //assign MMUReady = (BusState == MMUTRANSLATE && HREADY); assign InstrRData = HRDATA; + assign DCfromAHBReadData = HRDATA; assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD); assign CommitM = (BusState == MEMREAD) || (BusState == MEMWRITE) || (BusState == ATOMICREAD) || (BusState == ATOMICWRITE); // *** Bracker 6/5/21: why is this W stage? - assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) || - ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE)); + assign DCfromAHBAck = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE); //assign MMUReadPTE = HRDATA; // Carefully decide when to update ReadDataW // ReadDataMstored holds the most recent memory read. @@ -213,17 +189,20 @@ module ahblite ( flopr #(`XLEN) ReadDataOldWReg(clk, reset, HRDATANext, HRDATAW); // Extract and sign-extend subwords if necessary - subwordread swr(.*); + subwordread swr(.HRDATA(HRDATA), + .HADDRD(HADDRD), + .HSIZED(HSIZED), + .HRDATAMasked(HRDATAMasked)); // Handle AMO instructions if applicable generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; - amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), + amoalu amoalu(.srca(HRDATAW), .srcb(DCtoAHBWriteData), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); - mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); + mux2 #(`XLEN) wdmux(DCtoAHBWriteData, AMOResult, AtomicMaskedM[1], WriteData); end else - assign WriteData = WriteDataM; + assign WriteData = DCtoAHBWriteData; endgenerate endmodule diff --git a/wally-pipelined/src/generic/oneHotDecoder.sv b/wally-pipelined/src/generic/oneHotDecoder.sv new file mode 100644 index 000000000..08bd2e01c --- /dev/null +++ b/wally-pipelined/src/generic/oneHotDecoder.sv @@ -0,0 +1,39 @@ +/////////////////////////////////////////// +// oneHotDecoder.sv +// +// Written: ross1728@gmail.com July 09, 2021 +// Modified: +// +// Purpose: Bin to one hot decoder. Power of 2 only. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module oneHotDecoder + #(parameter WIDTH = 2) + (input logic [WIDTH-1:0] bin, + output logic [2**WIDTH-1:0] decoded + ); + + always_comb begin + decoded = '0; + decoded[bin] = 1'b1; + end + +endmodule diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index f55521061..e54802866 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -30,8 +30,8 @@ module hazard( input logic reset, // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, - input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic DCacheStall, ICacheStallF, + input logic LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD, + input logic LSUStall, ICacheStallF, input logic FPUStallD, FStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs @@ -56,10 +56,10 @@ module hazard( // If any stages are stalled, the first stage that isn't stalled must flush. assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE); - assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallDCause = (LoadStallD | StoreStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE | FDivBusyE; assign StallMCause = 0; - assign StallWCause = DCacheStall | ICacheStallF; + assign StallWCause = LSUStall | ICacheStallF; assign StallF = StallFCause | StallD; assign StallD = StallDCause | StallE; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 257dc6eba..ab503483c 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -52,6 +52,7 @@ module controller( output logic [1:0] MemRWM, output logic CSRReadM, CSRWriteM, PrivilegedM, output logic SCE, + output logic [1:0] AtomicE, output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit @@ -61,7 +62,8 @@ module controller( output logic RegWriteW, // for datapath and Hazard Unit output logic [2:0] ResultSrcW, // Stall during CSRs - output logic CSRWritePendingDEM + output logic CSRWritePendingDEM, + output logic StoreStallD ); logic [6:0] OpD; @@ -83,7 +85,7 @@ module controller( logic TargetSrcD, W64D, MulDivD; logic CSRZeroSrcD; logic CSRReadD; - logic [1:0] AtomicD, AtomicE; + logic [1:0] AtomicD; logic CSRWriteD, CSRWriteE; logic InstrValidD, InstrValidE; logic PrivilegedD, PrivilegedE; @@ -217,5 +219,7 @@ module controller( {RegWriteM, ResultSrcM, InstrValidM}, {RegWriteW, ResultSrcW, InstrValidW}); - assign CSRWritePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM; + assign CSRWritePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM; + + assign StoreStallD = MemRWE[0] & (|MemRWD | |AtomicD); endmodule diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index f041fce63..1c8e84c82 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -50,7 +50,7 @@ module datapath ( input logic FWriteIntM, input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, - output logic [`XLEN-1:0] WriteDataM, MemAdrM, + output logic [`XLEN-1:0] WriteDataM, MemAdrM, MemAdrE, // Writeback stage signals input logic StallW, FlushW, input logic FWriteIntW, @@ -120,6 +120,7 @@ module datapath ( flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; + assign MemAdrE = ALUResultE; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index e7b138694..21aca59b4 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -48,9 +48,11 @@ module ieu ( // Memory stage interface input logic DataMisalignedM, // from LSU input logic SquashSCW, // from LSU + output logic [1:0] MemRWE, // read/write control goes to LSU output logic [1:0] MemRWM, // read/write control goes to LSU + output logic [1:0] AtomicE, // atomic control goes to LSU output logic [1:0] AtomicM, // atomic control goes to LSU - output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU + output logic [`XLEN-1:0] MemAdrM, MemAdrE, WriteDataM, // Address and write data to LSU output logic [2:0] Funct3M, // size and signedness to LSU output logic [`XLEN-1:0] SrcAM, // to privilege and fpu @@ -72,7 +74,8 @@ module ieu ( input logic DivDoneE, input logic DivBusyE, output logic CSRReadM, CSRWriteM, PrivilegedM, - output logic CSRWritePendingDEM + output logic CSRWritePendingDEM, + output logic StoreStallD ); logic [2:0] ImmSrcD; @@ -90,7 +93,6 @@ module ieu ( logic RegWriteM, RegWriteW; logic MemReadE, CSRReadE; logic JumpE; - logic [1:0] MemRWE; controller c(.*); datapath dp(.*); diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 2412ffa13..a0728a1ad 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -84,8 +84,6 @@ module ifu ( output logic InstrAccessFaultF, output logic ISquashBusAccessF -// output logic [5:0] IHSELRegionsF - ); logic [`XLEN-1:0] PCCorrectE, UnalignedPCNextF, PCNextF; @@ -103,10 +101,6 @@ module ifu ( logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; - logic PMALoadAccessFaultM, PMAStoreAccessFaultM; - logic PMPLoadAccessFaultM, PMPStoreAccessFaultM; // *** these are just so that the mmu has somewhere to put these outputs, they're unused in this stage - // if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. - logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. generate @@ -138,6 +132,9 @@ module ifu ( .LoadAccessFaultM(), .StoreAccessFaultM(), .DisableTranslation(1'b0), + .Cacheable(), + .Idempotent(), + .AtomicAllowed(), .*); diff --git a/wally-pipelined/src/lsu/dcache.sv b/wally-pipelined/src/lsu/dcache.sv deleted file mode 100644 index e8dfeb5cd..000000000 --- a/wally-pipelined/src/lsu/dcache.sv +++ /dev/null @@ -1,184 +0,0 @@ -/////////////////////////////////////////// -// dcache.sv -// -// Written: jaallen@g.hmc.edu 2021-04-15 -// Modified: -// -// Purpose: Cache memory for the dmem so it can access memory less often, saving cycles -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module dcache( - // Basic pipeline stuff - input logic clk, reset, - input logic StallW, - input logic FlushW, - // Upper bits of physical address - input logic [`PA_BITS-1:12] UpperPAdrM, - // Lower 12 bits of virtual address, since it's faster this way - input logic [11:0] LowerVAdrM, - // Write to the dcache - input logic [`XLEN-1:0] DCacheWriteDataM, - input logic DCacheReadM, DCacheWriteM, - // Data read in from the ebu unit - input logic [`XLEN-1:0] ReadDataW, - input logic MemAckW, - // Access requested from the ebu unit - output logic [`PA_BITS-1:0] MemPAdrM, - output logic MemReadM, MemWriteM, - // High if the dcache is requesting a stall - output logic DCacheStallW, - // The data that was requested from the cache - output logic [`XLEN-1:0] DCacheReadW -); - - // Configuration parameters - // TODO Move these to a config file - localparam integer DCACHELINESIZE = 256; - localparam integer DCACHENUMLINES = 512; - - // Input signals to cache memory - logic FlushMem; - logic [`PA_BITS-1:12] DCacheMemUpperPAdr; - logic [11:0] DCacheMemLowerAdr; - logic DCacheMemWriteEnable; - logic [DCACHELINESIZE-1:0] DCacheMemWriteData; - logic [`XLEN-1:0] DCacheMemWritePAdr; - logic EndFetchState; - // Output signals from cache memory - logic [`XLEN-1:0] DCacheMemReadData; - logic DCacheMemReadValid; - - wtdirectmappedmem #(.LINESIZE(DCACHELINESIZE), .NUMLINES(DCACHENUMLINES), .WORDSIZE(`XLEN)) cachemem( - .*, - // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall - .stall(StallW), - .flush(FlushMem), - .ReadUpperPAdr(DCacheMemUpperPAdr), - .ReadLowerAdr(DCacheMemLowerAdr), - .LoadEnable(DCacheMemWriteEnable), - .LoadLine(DCacheMemWriteData), - .LoadPAdr(DCacheMemWritePAdr), - .DataWord(DCacheMemReadData), - .DataValid(DCacheMemReadValid), - .WriteEnable(0), - .WriteWord(0), - .WritePAdr(0), - .WriteSize(2'b10) - ); - - dcachecontroller #(.LINESIZE(DCACHELINESIZE)) controller(.*); - - // For now, assume no writes to executable memory - assign FlushMem = 1'b0; -endmodule - -module dcachecontroller #(parameter LINESIZE = 256) ( - // Inputs from pipeline - input logic clk, reset, - input logic StallW, - input logic FlushW, - - // Input the address to read - // The upper bits of the physical pc - input logic [`PA_BITS-1:12] DCacheMemUpperPAdr, - // The lower bits of the virtual pc - input logic [11:0] DCacheMemLowerAdr, - - // Signals to/from cache memory - // The read coming out of it - input logic [`XLEN-1:0] DCacheMemReadData, - input logic DCacheMemReadValid, - // Load data into the cache - output logic DCacheMemWriteEnable, - output logic [LINESIZE-1:0] DCacheMemWriteData, - output logic [`XLEN-1:0] DCacheMemWritePAdr, - - // The read that was requested - output logic [31:0] DCacheReadW, - - // Outputs to pipeline control stuff - output logic DCacheStallW, EndFetchState, - - // Signals to/from ahblite interface - // A read containing the requested data - input logic [`XLEN-1:0] ReadDataW, - input logic MemAckW, - // The read we request from main memory - output logic [`PA_BITS-1:0] MemPAdrM, - output logic MemReadM, MemWriteM -); - - // Cache fault signals - logic FaultStall; - - // Handle happy path (data in cache) - - always_comb begin - DCacheReadW = DCacheMemReadData; - end - - - // Handle cache faults - - localparam integer WORDSPERLINE = LINESIZE/`XLEN; - localparam integer LOGWPL = $clog2(WORDSPERLINE); - localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); - - logic FetchState, BeginFetchState; - logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; - logic [`PA_BITS-1:0] LineAlignedPCPF; - - flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); - flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); - - genvar i; - generate - for (i=0; i < WORDSPERLINE; i++) begin:sb - flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); - end - endgenerate - - // Enter the fetch state when we hit a cache fault - always_comb begin - BeginFetchState = ~DCacheMemReadValid & ~FetchState & (FetchWordNum == 0); - end - // Exit the fetch state once the cache line has been loaded - flopr #(1) EndFetchStateFlop(clk, reset, DCacheMemWriteEnable, EndFetchState); - - // Machinery to request the correct addresses from main memory - always_comb begin - MemReadM = FetchState & ~EndFetchState & ~DCacheMemWriteEnable; - LineAlignedPCPF = {DCacheMemUpperPAdr, DCacheMemLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; - MemPAdrM = LineAlignedPCPF + FetchWordNum*(`XLEN/8); - NextFetchWordNum = FetchState ? FetchWordNum+MemAckW : {LOGWPL+1{1'b0}}; - end - - // Write to cache memory when we have the line here - always_comb begin - DCacheMemWritePAdr = LineAlignedPCPF; - DCacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState; - end - - // Stall the pipeline while loading a new line from memory - always_comb begin - DCacheStallW = FetchState | ~DCacheMemReadValid; - end -endmodule diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index c7a915ee4..f1db20994 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -32,19 +32,23 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, - output logic DCacheStall, + output logic LSUStall, // Memory Stage // connected to cpu (controls) input logic [1:0] MemRWM, input logic [2:0] Funct3M, + input logic [6:0] Funct7M, input logic [1:0] AtomicM, + input logic ExceptionM, + input logic PendingInterruptM, output logic CommittedM, output logic SquashSCW, output logic DataMisalignedM, // address and write data input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] MemAdrE, input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] ReadDataW, @@ -60,14 +64,13 @@ module lsu // connect to ahb input logic CommitM, // should this be generated in the abh interface? - output logic [`PA_BITS-1:0] MemPAdrM, // to ahb - output logic MemReadM, MemWriteM, - output logic [1:0] AtomicMaskedM, - input logic MemAckW, // from ahb - input logic [`XLEN-1:0] HRDATAW, // from ahb - output logic [2:0] SizeFromLSU, - output logic StallWfromLSU, - + output logic [`PA_BITS-1:0] DCtoAHBPAdrM, + output logic DCtoAHBReadM, + output logic DCtoAHBWriteM, + input logic DCfromAHBAck, + input logic [`XLEN-1:0] DCfromAHBReadData, + output logic [`XLEN-1:0] DCtoAHBWriteData, + output logic [2:0] DCtoAHBSizeM, // mmu management @@ -87,14 +90,9 @@ module lsu output logic DTLBHitM, // not connected - // PMA/PMP (inside mmu) signals - input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. - input logic [2:0] HSIZE, HBURST, - input logic HWRITE, input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker. - output logic DSquashBusAccessM // output logic [5:0] DHSELRegionsM ); @@ -103,7 +101,9 @@ module lsu logic DTLBPageFaultM; logic MemAccessM; +/* -----\/----- EXCLUDED -----\/----- logic preCommittedM; + -----/\----- EXCLUDED -----/\----- */ typedef enum {STATE_READY, STATE_FETCH, @@ -115,38 +115,41 @@ module lsu STATE_PTW_DONE} statetype; statetype CurrState, NextState; + logic [`PA_BITS-1:0] MemPAdrM; // from mmu to dcache + logic DTLBMissM; logic [`XLEN-1:0] PageTableEntryM; logic [1:0] PageTypeM; logic DTLBWriteM; - logic [`XLEN-1:0] MMUReadPTE; + logic [`XLEN-1:0] HPTWReadPTE; logic MMUReady; logic HPTWStall; - logic [`XLEN-1:0] MMUPAdr; - logic MMUTranslate; + logic [`XLEN-1:0] HPTWPAdrE; + logic [`XLEN-1:0] HPTWPAdrM; logic HPTWRead; - logic [1:0] MemRWMtoLSU; - logic [2:0] SizeToLSU; - logic [1:0] AtomicMtoLSU; - logic [`XLEN-1:0] MemAdrMtoLSU; - logic [`XLEN-1:0] WriteDataMtoLSU; - logic [`XLEN-1:0] ReadDataWFromLSU; - logic StallWtoLSU; - logic CommittedMfromLSU; - logic SquashSCWfromLSU; - logic DataMisalignedMfromLSU; + logic [1:0] MemRWMtoDCache; + logic [2:0] Funct3MtoDCache; + logic [1:0] AtomicMtoDCache; + logic [`XLEN-1:0] MemAdrMtoDCache; + logic [`XLEN-1:0] MemAdrEtoDCache; + logic [`XLEN-1:0] ReadDataWfromDCache; + logic StallWtoDCache; + logic SquashSCWfromDCache; + logic DataMisalignedMfromDCache; logic HPTWReady; - logic LSUStall; logic DisableTranslation; // used to stop intermediate PTE physical addresses being saved to TLB. - - - - - // for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the - // CPU's read data input ReadDataW. - assign ReadDataWFromLSU = HRDATAW; + logic DCacheStall; + logic CacheableM; + logic CacheableMtoDCache; + logic SelPTW; + logic CommittedMfromDCache; + logic PendingInterruptMtoDCache; + logic FlushWtoDCache; + logic WalkerPageFaultM; + + pagetablewalker pagetablewalker( .clk(clk), .reset(reset), @@ -162,58 +165,63 @@ module lsu .PageTypeM(PageTypeM), .ITLBWriteF(ITLBWriteF), .DTLBWriteM(DTLBWriteM), - .MMUReadPTE(MMUReadPTE), + .HPTWReadPTE(HPTWReadPTE), .MMUReady(HPTWReady), .HPTWStall(HPTWStall), - .MMUPAdr(MMUPAdr), - .MMUTranslate(MMUTranslate), + .HPTWPAdrE(HPTWPAdrE), + .HPTWPAdrM(HPTWPAdrM), .HPTWRead(HPTWRead), + .SelPTW(SelPTW), .WalkerInstrPageFaultF(WalkerInstrPageFaultF), .WalkerLoadPageFaultM(WalkerLoadPageFaultM), .WalkerStorePageFaultM(WalkerStorePageFaultM)); - + assign WalkerPageFaultM = WalkerStorePageFaultM | WalkerLoadPageFaultM; // arbiter between IEU and pagetablewalker lsuArb arbiter(.clk(clk), .reset(reset), // HPTW connection - .HPTWTranslate(MMUTranslate), + .SelPTW(SelPTW), .HPTWRead(HPTWRead), - .HPTWPAdr(MMUPAdr), - .HPTWReadPTE(MMUReadPTE), + .HPTWPAdrE(HPTWPAdrE), + .HPTWPAdrM(HPTWPAdrM), + //.HPTWReadPTE(HPTWReadPTE), .HPTWStall(HPTWStall), // CPU connection .MemRWM(MemRWM), .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), - .WriteDataM(WriteDataM), // *** Need to remove this. + .MemAdrE(MemAdrE), + .CommittedM(CommittedM), + .PendingInterruptM(PendingInterruptM), .StallW(StallW), .ReadDataW(ReadDataW), - .CommittedM(CommittedM), .SquashSCW(SquashSCW), .DataMisalignedM(DataMisalignedM), - .DCacheStall(DCacheStall), - // LSU + .LSUStall(LSUStall), + // DCACHE .DisableTranslation(DisableTranslation), - .MemRWMtoLSU(MemRWMtoLSU), - .SizeToLSU(SizeToLSU), - .AtomicMtoLSU(AtomicMtoLSU), - .MemAdrMtoLSU(MemAdrMtoLSU), - .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? - .StallWtoLSU(StallWtoLSU), - .CommittedMfromLSU(CommittedMfromLSU), - .SquashSCWfromLSU(SquashSCWfromLSU), - .DataMisalignedMfromLSU(DataMisalignedMfromLSU), - .ReadDataWFromLSU(ReadDataWFromLSU), - .DataStall(LSUStall)); - + .MemRWMtoDCache(MemRWMtoDCache), + .Funct3MtoDCache(Funct3MtoDCache), + .AtomicMtoDCache(AtomicMtoDCache), + .MemAdrMtoDCache(MemAdrMtoDCache), + .MemAdrEtoDCache(MemAdrEtoDCache), + .StallWtoDCache(StallWtoDCache), + .SquashSCWfromDCache(SquashSCWfromDCache), + .DataMisalignedMfromDCache(DataMisalignedMfromDCache), + .ReadDataWfromDCache(ReadDataWfromDCache), + .CommittedMfromDCache(CommittedMfromDCache), + .PendingInterruptMtoDCache(PendingInterruptMtoDCache), + .DCacheStall(DCacheStall)); + + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) - dmmu(.Address(MemAdrMtoLSU), - .Size(SizeToLSU[1:0]), + dmmu(.Address(MemAdrMtoDCache), + .Size(Funct3MtoDCache[1:0]), .PTE(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -223,46 +231,60 @@ module lsu .TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM), .ExecuteAccessF(1'b0), - .AtomicAccessM(AtomicMaskedM[1]), - .WriteAccessM(MemRWMtoLSU[0]), - .ReadAccessM(MemRWMtoLSU[1]), - .SquashBusAccess(DSquashBusAccessM), + //.AtomicAccessM(AtomicMaskedM[1]), + .AtomicAccessM(1'b0), + .WriteAccessM(MemRWMtoDCache[0]), + .ReadAccessM(MemRWMtoDCache[1]), + .SquashBusAccess(), .DisableTranslation(DisableTranslation), .InstrAccessFaultF(), + .Cacheable(CacheableM), + .Idempotent(), + .AtomicAllowed(), // .SelRegions(DHSELRegionsM), .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? + // *** BUG, this is most likely wrong + assign CacheableMtoDCache = SelPTW ? 1'b1 : CacheableM; + + generate + if (`XLEN == 32) assign DCtoAHBSizeM = CacheableMtoDCache ? 3'b010 : Funct3MtoDCache; + else assign DCtoAHBSizeM = CacheableMtoDCache ? 3'b011 : Funct3MtoDCache; + endgenerate; + + // Specify which type of page fault is occurring - assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWMtoLSU[1]; - assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWMtoLSU[0]; + assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWMtoDCache[1]; + assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWMtoDCache[0]; // Determine if an Unaligned access is taking place always_comb - case(SizeToLSU[1:0]) - 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu - 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu - 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedMfromLSU = |MemAdrMtoLSU[2:0]; // ld, sd, fld, fsd + case(Funct3MtoDCache[1:0]) + 2'b00: DataMisalignedMfromDCache = 0; // lb, sb, lbu + 2'b01: DataMisalignedMfromDCache = MemAdrMtoDCache[0]; // lh, sh, lhu + 2'b10: DataMisalignedMfromDCache = MemAdrMtoDCache[1] | MemAdrMtoDCache[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedMfromDCache = |MemAdrMtoDCache[2:0]; // ld, sd, fld, fsd endcase // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit - // Changed DataMisalignedMfromLSU to a larger combination of trap sources + // Changed DataMisalignedMfromDCache to a larger combination of trap sources // NonBusTrapM is anything that the bus doesn't contribute to producing // By contrast, using TrapM results in circular logic errors - assign MemReadM = MemRWMtoLSU[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; - assign MemWriteM = MemRWMtoLSU[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; - assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicMtoLSU : 2'b00 ; +/* -----\/----- EXCLUDED -----\/----- + + // *** BUG for now leave this out. come back later after the d cache is working. July 09, 2021 + + assign MemReadM = MemRWMtoDCache[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWMtoDCache[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; + assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicMtoDCache : 2'b00 ; assign MemAccessM = MemReadM | MemWriteM; // Determine if M stage committed // Reset whenever unstalled. Set when access successfully occurs - flopr #(1) committedMreg(clk,reset,(CommittedMfromLSU | CommitM) & StallM,preCommittedM); - assign CommittedMfromLSU = preCommittedM | CommitM; + flopr #(1) committedMreg(clk,reset,(CommittedMfromDCache | CommitM) & StallM,preCommittedM); + assign CommittedMfromDCache = preCommittedM | CommitM; - // Determine if address is valid - assign LoadMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[1]; - assign StoreMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[0]; // Handle atomic load reserved / store conditional generate @@ -271,9 +293,9 @@ module lsu logic ReservationValidM, ReservationValidW; logic lrM, scM, WriteAdrMatchM; - assign lrM = MemReadM && AtomicMtoLSU[0]; - assign scM = MemRWMtoLSU[0] && AtomicMtoLSU[0]; - assign WriteAdrMatchM = MemRWMtoLSU[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; + assign lrM = MemReadM && AtomicMtoDCache[0]; + assign scM = MemRWMtoDCache[0] && AtomicMtoDCache[0]; + assign WriteAdrMatchM = MemRWMtoDCache[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; assign SquashSCM = scM && ~WriteAdrMatchM; always_comb begin // ReservationValidM (next value of valid reservation) if (lrM) ReservationValidM = 1; // set valid on load reserve @@ -282,22 +304,67 @@ module lsu end flopenrc #(`PA_BITS-2) resadrreg(clk, reset, FlushW, lrM, MemPAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenrc #(1) resvldreg(clk, reset, FlushW, lrM, ReservationValidM, ReservationValidW); - flopenrc #(1) squashreg(clk, reset, FlushW, ~StallWtoLSU, SquashSCM, SquashSCWfromLSU); + flopenrc #(1) squashreg(clk, reset, FlushW, ~StallWtoDCache, SquashSCM, SquashSCWfromDCache); end else begin // Atomic operations not supported assign SquashSCM = 0; - assign SquashSCWfromLSU = 0; + assign SquashSCWfromDCache = 0; end endgenerate + -----/\----- EXCLUDED -----/\----- */ + + // Determine if address is valid + assign LoadMisalignedFaultM = DataMisalignedMfromDCache & MemRWMtoDCache[1]; + assign StoreMisalignedFaultM = DataMisalignedMfromDCache & MemRWMtoDCache[0]; + + dcache dcache(.clk(clk), + .reset(reset), + .StallM(StallM), + .StallW(StallWtoDCache), + .FlushM(FlushM), + .FlushW(FlushWtoDCache), + .MemRWM(MemRWMtoDCache), + .Funct3M(Funct3MtoDCache), + .Funct7M(Funct7M), + .AtomicM(AtomicMtoDCache), + .MemAdrE(MemAdrEtoDCache), + .MemPAdrM(MemPAdrM), + .WriteDataM(WriteDataM), + .ReadDataW(ReadDataWfromDCache), + .ReadDataM(HPTWReadPTE), + .DCacheStall(DCacheStall), + .CommittedM(CommittedMfromDCache), + .ExceptionM(ExceptionM), + .PendingInterruptM(PendingInterruptMtoDCache), + .DTLBMissM(DTLBMissM), + .CacheableM(CacheableMtoDCache), + .DTLBWriteM(DTLBWriteM), + .SelPTW(SelPTW), + .WalkerPageFaultM(WalkerPageFaultM), + + // AHB connection + .AHBPAdr(DCtoAHBPAdrM), + .AHBRead(DCtoAHBReadM), + .AHBWrite(DCtoAHBWriteM), + .AHBAck(DCfromAHBAck), + .HWDATA(DCtoAHBWriteData), + .HRDATA(DCfromAHBReadData) + ); + +// assign AtomicMaskedM = 2'b00; // *** Remove from AHB + // Data stall //assign LSUStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); - assign HPTWReady = (CurrState == STATE_READY); + // BUG *** July 09, 2021 + //assign HPTWReady = (CurrState == STATE_READY); // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately // requests data from memory rather than issuing a single request. +/* -----\/----- EXCLUDED -----\/----- + // *** BUG will need to modify this so we can handle the ptw. July 09, 2021 flopenl #(.TYPE(statetype)) stateReg(.clk(clk), .load(reset), @@ -315,10 +382,10 @@ module lsu end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check LSUStall = 1'b1; - end else if((MemReadM & AtomicMtoLSU[0]) | (MemWriteM & AtomicMtoLSU[0])) begin + end else if((MemReadM & AtomicMtoDCache[0]) | (MemWriteM & AtomicMtoDCache[0])) begin NextState = STATE_FETCH_AMO_2; LSUStall = 1'b1; - end else if (MemAccessM & ~DataMisalignedMfromLSU) begin + end else if (MemAccessM & ~DataMisalignedMfromDCache) begin NextState = STATE_FETCH; LSUStall = 1'b1; end else begin @@ -327,7 +394,7 @@ module lsu end STATE_FETCH_AMO_1: begin LSUStall = 1'b1; - if (MemAckW) begin + if (DCfromAHBAck) begin NextState = STATE_FETCH_AMO_2; end else begin NextState = STATE_FETCH_AMO_1; @@ -335,9 +402,9 @@ module lsu end STATE_FETCH_AMO_2: begin LSUStall = 1'b1; - if (MemAckW & ~StallWtoLSU) begin + if (DCfromAHBAck & ~StallWtoDCache) begin NextState = STATE_FETCH_AMO_2; - end else if (MemAckW & StallWtoLSU) begin + end else if (DCfromAHBAck & StallWtoDCache) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH_AMO_2; @@ -345,9 +412,9 @@ module lsu end STATE_FETCH: begin LSUStall = 1'b1; - if (MemAckW & ~StallWtoLSU) begin + if (DCfromAHBAck & ~StallWtoDCache) begin NextState = STATE_READY; - end else if (MemAckW & StallWtoLSU) begin + end else if (DCfromAHBAck & StallWtoDCache) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH; @@ -355,7 +422,7 @@ module lsu end STATE_STALLED: begin LSUStall = 1'b0; - if (~StallWtoLSU) begin + if (~StallWtoDCache) begin NextState = STATE_READY; end else begin NextState = STATE_STALLED; @@ -366,7 +433,7 @@ module lsu if (DTLBWriteM) begin NextState = STATE_READY; LSUStall = 1'b1; - end else if (MemReadM & ~DataMisalignedMfromLSU) begin + end else if (MemReadM & ~DataMisalignedMfromDCache) begin NextState = STATE_PTW_FETCH; end else begin NextState = STATE_PTW_READY; @@ -374,9 +441,9 @@ module lsu end STATE_PTW_FETCH : begin LSUStall = 1'b1; - if (MemAckW & ~DTLBWriteM) begin + if (DCfromAHBAck & ~DTLBWriteM) begin NextState = STATE_PTW_READY; - end else if (MemAckW & DTLBWriteM) begin + end else if (DCfromAHBAck & DTLBWriteM) begin NextState = STATE_READY; end else begin NextState = STATE_PTW_FETCH; @@ -391,11 +458,8 @@ module lsu end endcase end // always_comb + -----/\----- EXCLUDED -----/\----- */ - // *** for now just pass through size - assign SizeFromLSU = SizeToLSU; - assign StallWfromLSU = StallWtoLSU; - endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 23e88970f..13a772435 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -30,11 +30,12 @@ module lsuArb (input logic clk, reset, // from page table walker - input logic HPTWTranslate, + input logic SelPTW, input logic HPTWRead, - input logic [`XLEN-1:0] HPTWPAdr, + input logic [`XLEN-1:0] HPTWPAdrE, + input logic [`XLEN-1:0] HPTWPAdrM, // to page table walker. - output logic [`XLEN-1:0] HPTWReadPTE, + //output logic [`XLEN-1:0] HPTWReadPTE, output logic HPTWStall, // from CPU @@ -42,132 +43,67 @@ module lsuArb input logic [2:0] Funct3M, input logic [1:0] AtomicM, input logic [`XLEN-1:0] MemAdrM, - input logic [`XLEN-1:0] WriteDataM, + input logic [`XLEN-1:0] MemAdrE, input logic StallW, + input logic PendingInterruptM, // to CPU output logic [`XLEN-1:0] ReadDataW, - output logic CommittedM, output logic SquashSCW, output logic DataMisalignedM, - output logic DCacheStall, + output logic CommittedM, + output logic LSUStall, - // to LSU + // to D Cache output logic DisableTranslation, - output logic [1:0] MemRWMtoLSU, - output logic [2:0] SizeToLSU, - output logic [1:0] AtomicMtoLSU, - output logic [`XLEN-1:0] MemAdrMtoLSU, - output logic [`XLEN-1:0] WriteDataMtoLSU, - output logic StallWtoLSU, - // from LSU - input logic CommittedMfromLSU, - input logic SquashSCWfromLSU, - input logic DataMisalignedMfromLSU, - input logic [`XLEN-1:0] ReadDataWFromLSU, - input logic DataStall + output logic [1:0] MemRWMtoDCache, + output logic [2:0] Funct3MtoDCache, + output logic [1:0] AtomicMtoDCache, + output logic [`XLEN-1:0] MemAdrMtoDCache, + output logic [`XLEN-1:0] MemAdrEtoDCache, + output logic StallWtoDCache, + output logic PendingInterruptMtoDCache, + + + // from D Cache + input logic CommittedMfromDCache, + input logic SquashSCWfromDCache, + input logic DataMisalignedMfromDCache, + input logic [`XLEN-1:0] ReadDataWfromDCache, + input logic DCacheStall ); - - // HPTWTranslate is the request for memory by the page table walker. When - // this is high the page table walker gains priority over the CPU's data - // input. Note the ptw only makes a request after an instruction or data - // tlb miss. It is entirely possible the dcache is currently processing - // a data cache miss when an instruction tlb miss occurs. If an instruction - // in the E stage causes a d cache miss, the d cache will immediately start - // processing the request. Simultaneously the ITLB misses. By the time - // the TLB miss causes the page table walker to issue the first request - // to data memory the d cache is already busy. We can interlock by - // leveraging Stall as a d cache busy. We will need an FSM to handle this. - typedef enum{StateReady, - StatePTWPending, - StatePTWActive} statetype; - - - statetype CurrState, NextState; - logic SelPTW; - logic HPTWStallD; logic [2:0] PTWSize; - - flopenl #(.TYPE(statetype)) StateReg(.clk(clk), - .load(reset), - .en(1'b1), - .d(NextState), - .val(StateReady), - .q(CurrState)); - - always_comb begin - case(CurrState) - StateReady: - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; - StatePTWActive: - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; - default: NextState = StateReady; - endcase - end - -/* -----\/----- EXCLUDED -----\/----- - - always_comb begin - case(CurrState) - StateReady: - /-* -----\/----- EXCLUDED -----\/----- - if (HPTWTranslate & DataStall) NextState = StatePTWPending; - else - -----/\----- EXCLUDED -----/\----- *-/ - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; - StatePTWPending: - if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; - else if (HPTWTranslate & DataStall) NextState = StatePTWPending; - else NextState = StateReady; - StatePTWActive: - if (HPTWTranslate) NextState = StatePTWActive; - else NextState = StateReady; - default: NextState = StateReady; - endcase - end - - -----/\----- EXCLUDED -----/\----- */ - // multiplex the outputs to LSU assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. - assign SelPTW = (CurrState == StatePTWActive && HPTWTranslate) || (CurrState == StateReady && HPTWTranslate); - assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; + assign MemRWMtoDCache = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw endgenerate - mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, SizeToLSU); + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoDCache); - assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; - assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; - assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM; - assign StallWtoLSU = SelPTW ? 1'b0 : StallW; + assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; + assign MemAdrMtoDCache = SelPTW ? HPTWPAdrM : MemAdrM; + assign MemAdrEtoDCache = SelPTW ? HPTWPAdrE : MemAdrE; + assign StallWtoDCache = SelPTW ? 1'b0 : StallW; + // always block interrupts when using the hardware page table walker. + assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; // demux the inputs from LSU to walker or cpu's data port. - assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWFromLSU; // probably can avoid this demux - assign HPTWReadPTE = SelPTW ? ReadDataWFromLSU : `XLEN'b0 ; // probably can avoid this demux - assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; - assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; - assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; + assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWfromDCache; // probably can avoid this demux + //assign HPTWReadPTE = SelPTW ? ReadDataWfromDCache : `XLEN'b0 ; // probably can avoid this demux + assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromDCache; + assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromDCache; // *** need to rename DcacheStall and Datastall. // not clear at all. I think it should be LSUStall from the LSU, // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). - assign HPTWStall = SelPTW ? DataStall : 1'b1; - //assign HPTWStallD = SelPTW ? DataStall : 1'b1; -/* -----\/----- EXCLUDED -----\/----- - assign HPTWStallD = SelPTW ? DataStall : 1'b1; - flopr #(1) HPTWStallReg (.clk(clk), - .reset(reset), - .d(HPTWStallD), - .q(HPTWStall)); - -----/\----- EXCLUDED -----/\----- */ - - assign DCacheStall = SelPTW ? 1'b1 : DataStall; // *** this is probably going to change. + assign HPTWStall = SelPTW ? DCacheStall : 1'b1; + + assign PendingInterruptMtoDCache = SelPTW ? 1'b0 : PendingInterruptM; + + assign LSUStall = SelPTW ? 1'b1 : DCacheStall; // *** this is probably going to change. endmodule diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index 4b6a0fdad..72abc7bae 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -60,6 +60,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // nuber of TLB Entries output logic [`PA_BITS-1:0] PhysicalAddress, output logic TLBMiss, output logic TLBHit, + output logic Cacheable, Idempotent, AtomicAllowed, // Faults output logic TLBPageFault, @@ -78,7 +79,6 @@ module mmu #(parameter TLB_ENTRIES = 8, // nuber of TLB Entries logic [`PA_BITS-1:0] TLBPAdr; logic [`XLEN+1:0] AddressExt; logic PMPSquashBusAccess, PMASquashBusAccess; - logic Cacheable, Idempotent, AtomicAllowed; // *** here so that the pmachecker has somewhere to put these outputs. *** I'm leaving them as outputs to pma checker, but I'm stopping them here. // Translation lookaside buffer logic PMAInstrAccessFaultF, PMPInstrAccessFaultF; @@ -117,9 +117,10 @@ module mmu #(parameter TLB_ENTRIES = 8, // nuber of TLB Entries pmpchecker pmpchecker(.*); + // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. assign SquashBusAccess = PMASquashBusAccess | PMPSquashBusAccess; - assign InstrAccessFaultF = PMAInstrAccessFaultF | PMPInstrAccessFaultF; - assign LoadAccessFaultM = PMALoadAccessFaultM | PMPLoadAccessFaultM; - assign StoreAccessFaultM = PMAStoreAccessFaultM | PMPStoreAccessFaultM; + assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); + assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); + assign StoreAccessFaultM = (PMAStoreAccessFaultM | PMPStoreAccessFaultM) & ~(Translate & ~TLBHit); endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 6357f1c6a..a41f5ca0d 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -9,21 +9,21 @@ // // Purpose: Page Table Walker // Part of the Memory Management Unit (MMU) -// +// // A component of the Wally configurable RISC-V project. -// +// // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// @@ -37,85 +37,95 @@ module pagetablewalker ( // Control signals - input logic clk, reset, + input logic clk, reset, input logic [`XLEN-1:0] SATP_REGW, // Signals from TLBs (addresses to translate) input logic [`XLEN-1:0] PCF, MemAdrM, - input logic ITLBMissF, DTLBMissM, - input logic [1:0] MemRWM, + input logic ITLBMissF, DTLBMissM, + input logic [1:0] MemRWM, // Outputs to the TLBs (PTEs to write) output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, - output logic [1:0] PageTypeF, PageTypeM, - output logic ITLBWriteF, DTLBWriteM, - - + output logic [1:0] PageTypeF, PageTypeM, + output logic ITLBWriteF, DTLBWriteM, + output logic SelPTW, // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU - input logic [`XLEN-1:0] MMUReadPTE, - input logic MMUReady, - input logic HPTWStall, + input logic [`XLEN-1:0] HPTWReadPTE, + input logic MMUReady, + input logic HPTWStall, // *** modify to send to LSU - output logic [`XLEN-1:0] MMUPAdr, // this probalby should be `PA_BITS wide - output logic MMUTranslate, // *** rename to HPTWReq - output logic HPTWRead, + output logic [`XLEN-1:0] HPTWPAdrE, // this probalby should be `PA_BITS wide + output logic [`XLEN-1:0] HPTWPAdrM, // this probalby should be `PA_BITS wide + output logic HPTWRead, // Faults - output logic WalkerInstrPageFaultF, - output logic WalkerLoadPageFaultM, - output logic WalkerStorePageFaultM + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM ); + generate if (`MEM_VIRTMEM) begin // Internal signals // register TLBs translation miss requests - logic [`XLEN-1:0] TranslationVAdrQ; - logic ITLBMissFQ, DTLBMissMQ; - - logic [`PPN_BITS-1:0] BasePageTablePPN; - logic [`XLEN-1:0] TranslationVAdr; - logic [`XLEN-1:0] SavedPTE, CurrentPTE; - logic [`PA_BITS-1:0] TranslationPAdr; - logic [`PPN_BITS-1:0] CurrentPPN; - logic [`SVMODE_BITS-1:0] SvMode; - logic MemStore; + logic ITLBMissFQ, DTLBMissMQ; + + logic [`PPN_BITS-1:0] BasePageTablePPN; + logic [`XLEN-1:0] TranslationVAdr; + logic [`XLEN-1:0] SavedPTE, CurrentPTE; + logic [`PA_BITS-1:0] TranslationPAdr; + logic [`PPN_BITS-1:0] CurrentPPN; + logic [`SVMODE_BITS-1:0] SvMode; + logic MemStore; // PTE Control Bits - logic Dirty, Accessed, Global, User, - Executable, Writable, Readable, Valid; + logic Dirty, Accessed, Global, User, + Executable, Writable, Readable, Valid; // PTE descriptions - logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; + logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; // Outputs of walker - logic [`XLEN-1:0] PageTableEntry; - logic [1:0] PageType; - logic StartWalk; - logic EndWalk; - - typedef enum {LEVEL0_WDV, - LEVEL0, - LEVEL1_WDV, - LEVEL1, - LEVEL2_WDV, - LEVEL2, - LEVEL3_WDV, - LEVEL3, - LEAF, - IDLE, - START, - FAULT} statetype; + logic [`XLEN-1:0] PageTableEntry; + logic [1:0] PageType; + logic StartWalk; + logic EndWalk; + + typedef enum {LEVEL0_SET_ADRE, + LEVEL0_WDV, + LEVEL0, + LEVEL1_SET_ADRE, + LEVEL1_WDV, + LEVEL1, + LEVEL2_SET_ADRE, + LEVEL2_WDV, + LEVEL2, + LEVEL3_SET_ADRE, + LEVEL3_WDV, + LEVEL3, + LEAF, + IDLE, + FAULT} statetype; + + statetype WalkerState, NextWalkerState, PreviousWalkerState; + + logic PRegEn; + logic SelDataTranslation; + logic AnyTLBMissM; + + + + flop #(`XLEN) HPTWPAdrMReg(.clk(clk), + .d(HPTWPAdrE), + .q(HPTWPAdrM)); + - statetype WalkerState, NextWalkerState; - logic PRegEn; - logic SelDataTranslation; - - assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; @@ -123,47 +133,34 @@ module pagetablewalker assign MemStore = MemRWM[0]; // Prefer data address translations over instruction address translations - assign TranslationVAdr = (SelDataTranslation) ? MemAdrM : PCF; // *** need to register TranslationVAdr + assign TranslationVAdr = (SelDataTranslation) ? MemAdrM : PCF; assign SelDataTranslation = DTLBMissMQ | DTLBMissM; - flopenr #(`XLEN) - TranslationVAdrReg(.clk(clk), - .reset(reset), - .en(StartWalk), - .d(TranslationVAdr), - .q(TranslationVAdrQ)); - flopenrc #(1) DTLBMissMReg(.clk(clk), - .reset(reset), - .en(StartWalk | EndWalk), - .clear(EndWalk), - .d(DTLBMissM), - .q(DTLBMissMQ)); - + .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), + .d(DTLBMissM), + .q(DTLBMissMQ)); + flopenrc #(1) ITLBMissMReg(.clk(clk), - .reset(reset), - .en(StartWalk | EndWalk), - .clear(EndWalk), - .d(ITLBMissF), - .q(ITLBMissFQ)); - + .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), + .d(ITLBMissF), + .q(ITLBMissFQ)); - assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); - assign EndWalk = WalkerState == LEAF || - //(WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) || - (WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) || - (WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) || - (WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) || - (WalkerState == FAULT); - - assign MMUTranslate = (DTLBMissMQ | ITLBMissFQ) & ~EndWalk; - //assign MMUTranslate = DTLBMissM | ITLBMissF; + + assign AnyTLBMissM = DTLBMissM | ITLBMissF; + + assign StartWalk = WalkerState == IDLE & AnyTLBMissM; + assign EndWalk = WalkerState == LEAF || WalkerState == FAULT; // unswizzle PTE bits assign {Dirty, Accessed, Global, User, - Executable, Writable, Readable, Valid} = CurrentPTE[7:0]; + Executable, Writable, Readable, Valid} = CurrentPTE[7:0]; // Assign PTE descriptors common across all XLEN values assign LeafPTE = Executable | Writable | Readable; @@ -177,388 +174,392 @@ module pagetablewalker assign PageTypeM = PageType; -// generate - if (`XLEN == 32) begin - logic [9:0] VPN1, VPN0; + // generate + if (`XLEN == 32) begin + logic [9:0] VPN1, VPN0; - flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); - /* -----\/----- EXCLUDED -----\/----- - assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; - -----/\----- EXCLUDED -----/\----- */ + flopenl #(.TYPE(statetype)) PreviousWalkerStateReg(clk, reset, 1'b1, WalkerState, IDLE, PreviousWalkerState); + + /* -----\/----- EXCLUDED -----\/----- + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ - // State transition logic - always_comb begin - PRegEn = 1'b0; - TranslationPAdr = '0; - HPTWRead = 1'b0; - PageTableEntry = '0; - PageType = '0; - DTLBWriteM = '0; - ITLBWriteF = '0; - - WalkerInstrPageFaultF = 1'b0; - WalkerLoadPageFaultM = 1'b0; - WalkerStorePageFaultM = 1'b0; + // State transition logic + always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; - case (WalkerState) - IDLE: begin - if (MMUTranslate && SvMode == `SV32) begin // *** Added SvMode - NextWalkerState = START; - end else begin - NextWalkerState = IDLE; - end - end + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; - START: begin - NextWalkerState = LEVEL1_WDV; - TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; - HPTWRead = 1'b1; - end - - LEVEL1_WDV: begin - TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; - if (HPTWStall) begin - NextWalkerState = LEVEL1_WDV; - end else begin - NextWalkerState = LEVEL1; - PRegEn = 1'b1; - end - end - - LEVEL1: begin - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadMegapage) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux? - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; - end - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) begin - NextWalkerState = LEVEL0_WDV; - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; - end - end - - LEVEL0_WDV: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - if (HPTWStall) begin - NextWalkerState = LEVEL0_WDV; - end else begin - NextWalkerState = LEVEL0; - PRegEn = 1'b1; - end - end + SelPTW = 1'b1; - LEVEL0: begin - if (ValidPTE & LeafPTE & ~AccessAlert) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; - end else begin - NextWalkerState = FAULT; - end - end - - LEAF: begin - NextWalkerState = IDLE; - end - FAULT: begin - NextWalkerState = IDLE; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end - - // Default case should never happen, but is included for linter. - default: NextWalkerState = IDLE; - endcase - end + case (WalkerState) + IDLE: begin + SelPTW = 1'b0; + if (AnyTLBMissM & SvMode == `SV32) begin + NextWalkerState = LEVEL1_SET_ADRE; + end else begin + NextWalkerState = IDLE; + end + end - // A megapage is a Level 1 leaf page. This page must have zero PPN[0]. - assign MegapageMisaligned = |(CurrentPPN[9:0]); - assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme + LEVEL1_SET_ADRE: begin + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + end - assign VPN1 = TranslationVAdrQ[31:22]; - assign VPN0 = TranslationVAdrQ[21:12]; + LEVEL1_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end - + LEVEL1: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + TranslationPAdr = {2'b00, TranslationVAdr[31:0]}; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_SET_ADRE; + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end + end - // Capture page table entry from data cache - // *** may need to delay reading this value until the next clock cycle. - // The clk to q latency of the SRAM in the data cache will be long. - // I cannot see directly using this value. This is no different than - // a load delay hazard. This will require rewriting the walker fsm. - // also need a new signal to save. Should be a mealy output of the fsm - // request followed by ~stall. - flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); - //mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); - assign CurrentPTE = SavedPTE; - assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; + LEVEL0_SET_ADRE: begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + end - // Assign outputs to ahblite - // *** Currently truncate address to 32 bits. This must be changed if - // we support larger physical address spaces - assign MMUPAdr = TranslationPAdr[31:0]; + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end - end else begin - - logic [8:0] VPN3, VPN2, VPN1, VPN0; + LEVEL0: begin + if (ValidPTE & LeafPTE & ~AccessAlert) begin + NextWalkerState = LEAF; + TranslationPAdr = {2'b00, TranslationVAdr[31:0]}; + end else begin + NextWalkerState = FAULT; + end + end - logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; + LEAF: begin + NextWalkerState = IDLE; + PageTableEntry = CurrentPTE; + PageType = (PreviousWalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux? + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = {2'b00, TranslationVAdr[31:0]}; + end - flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + FAULT: begin + SelPTW = 1'b0; + NextWalkerState = IDLE; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end - /* -----\/----- EXCLUDED -----\/----- - assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || - WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; - -----/\----- EXCLUDED -----/\----- */ + // Default case should never happen, but is included for linter. + default: NextWalkerState = IDLE; + endcase + end - //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || - // WalkerState == LEVEL2 || WalkerState == LEVEL1; - + // A megapage is a Level 1 leaf page. This page must have zero PPN[0]. + assign MegapageMisaligned = |(CurrentPPN[9:0]); + assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - always_comb begin - PRegEn = 1'b0; - TranslationPAdr = '0; - HPTWRead = 1'b0; - PageTableEntry = '0; - PageType = '0; - DTLBWriteM = '0; - ITLBWriteF = '0; - - WalkerInstrPageFaultF = 1'b0; - WalkerLoadPageFaultM = 1'b0; - WalkerStorePageFaultM = 1'b0; - - case (WalkerState) - IDLE: begin - if (MMUTranslate && (SvMode == `SV48 || SvMode == `SV39)) begin - NextWalkerState = START; - end else begin - NextWalkerState = IDLE; - end - end - - START: begin - if (MMUTranslate && SvMode == `SV48) begin - NextWalkerState = LEVEL3_WDV; - TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - HPTWRead = 1'b1; - end else if (MMUTranslate && SvMode == `SV39) begin - NextWalkerState = LEVEL2_WDV; - TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000}; - HPTWRead = 1'b1; - end else begin // *** should not get here - NextWalkerState = IDLE; - TranslationPAdr = '0; - end - end - - LEVEL3_WDV: begin - TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - if (HPTWStall) begin - NextWalkerState = LEVEL3_WDV; - end else begin - NextWalkerState = LEVEL3; - PRegEn = 1'b1; - end - end - - LEVEL3: begin - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadTerapage) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux? - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; - end - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) begin - NextWalkerState = LEVEL2_WDV; - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; - end - - end - - LEVEL2_WDV: begin - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - //HPTWRead = 1'b1; - if (HPTWStall) begin - NextWalkerState = LEVEL2_WDV; - end else begin - NextWalkerState = LEVEL2; - PRegEn = 1'b1; - end - end - - LEVEL2: begin - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadGigapage) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; - end - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) begin - NextWalkerState = LEVEL1_WDV; - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; - end - - end - - LEVEL1_WDV: begin - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - //HPTWRead = 1'b1; - if (HPTWStall) begin - NextWalkerState = LEVEL1_WDV; - end else begin - NextWalkerState = LEVEL1; - PRegEn = 1'b1; - end - end - - LEVEL1: begin - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - if (ValidPTE && LeafPTE && ~BadMegapage) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; - - end - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) begin - NextWalkerState = LEVEL0_WDV; - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - HPTWRead = 1'b1; - end else begin - NextWalkerState = FAULT; - end - end - - LEVEL0_WDV: begin - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - if (HPTWStall) begin - NextWalkerState = LEVEL0_WDV; - end else begin - NextWalkerState = LEVEL0; - PRegEn = 1'b1; - end - end - - LEVEL0: begin - if (ValidPTE && LeafPTE && ~AccessAlert) begin - NextWalkerState = LEAF; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissMQ; - ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions - TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; - end else begin - NextWalkerState = FAULT; - end - end - - LEAF: begin - NextWalkerState = IDLE; - end - - FAULT: begin - NextWalkerState = IDLE; - WalkerInstrPageFaultF = ~DTLBMissMQ; - WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; - WalkerStorePageFaultM = DTLBMissMQ && MemStore; - end - - // Default case should never happen - default: begin - NextWalkerState = IDLE; - end - - endcase - end - - // A terapage is a level 3 leaf page. This page must have zero PPN[2], - // zero PPN[1], and zero PPN[0] - assign TerapageMisaligned = |(CurrentPPN[26:0]); - // A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and - // zero PPN[0] - assign GigapageMisaligned = |(CurrentPPN[17:0]); - // A megapage is a Level 1 leaf page. This page must have zero PPN[0]. - assign MegapageMisaligned = |(CurrentPPN[8:0]); - - assign BadTerapage = TerapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - - assign VPN3 = TranslationVAdrQ[47:39]; - assign VPN2 = TranslationVAdrQ[38:30]; - assign VPN1 = TranslationVAdrQ[29:21]; - assign VPN0 = TranslationVAdrQ[20:12]; + assign VPN1 = TranslationVAdr[31:22]; + assign VPN0 = TranslationVAdr[21:12]; - // Capture page table entry from ahblite - flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); - //mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); - assign CurrentPTE = SavedPTE; - assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; - // Assign outputs to ahblite - // *** Currently truncate address to 32 bits. This must be changed if - // we support larger physical address spaces - assign MMUPAdr = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]}; - end + // Capture page table entry from data cache + // *** may need to delay reading this value until the next clock cycle. + // The clk to q latency of the SRAM in the data cache will be long. + // I cannot see directly using this value. This is no different than + // a load delay hazard. This will require rewriting the walker fsm. + // also need a new signal to save. Should be a mealy output of the fsm + // request followed by ~stall. + flopenr #(32) ptereg(clk, reset, PRegEn, HPTWReadPTE, SavedPTE); + //mux2 #(32) ptemux(SavedPTE, HPTWReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; + assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; + + // Assign outputs to ahblite + // *** Currently truncate address to 32 bits. This must be changed if + // we support larger physical address spaces + assign HPTWPAdrE = TranslationPAdr[31:0]; + + end else begin + + logic [8:0] VPN3, VPN2, VPN1, VPN0; + + logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; + + flopenl #(.TYPE(statetype)) WalkerStageReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + + flopenl #(.TYPE(statetype)) PreviousWalkerStateReg(clk, reset, 1'b1, WalkerState, IDLE, PreviousWalkerState); + + /* -----\/----- EXCLUDED -----\/----- + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || + WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ + + //assign HPTWRead = (WalkerState == IDLE && HPTWTranslate) || WalkerState == LEVEL3 || + // WalkerState == LEVEL2 || WalkerState == LEVEL1; + + + always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; + + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; + + SelPTW = 1'b1; + + case (WalkerState) + IDLE: begin + SelPTW = 1'b0; + if (AnyTLBMissM & SvMode == `SV48) begin + NextWalkerState = LEVEL3_SET_ADRE; + end else if (AnyTLBMissM & SvMode == `SV39) begin + NextWalkerState = LEVEL2_SET_ADRE; + end else begin + NextWalkerState = IDLE; + end + end + + LEVEL3_SET_ADRE: begin + NextWalkerState = LEVEL3_WDV; + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + end + + LEVEL3_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL3_WDV; + end else begin + NextWalkerState = LEVEL3; + PRegEn = 1'b1; + end + end + + LEVEL3: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadTerapage) begin + NextWalkerState = LEAF; + TranslationPAdr = TranslationVAdr[`PA_BITS-1:0]; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL2_SET_ADRE; + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL2_SET_ADRE: begin + NextWalkerState = LEVEL2_WDV; + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + end + + LEVEL2_WDV: begin + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL2_WDV; + end else begin + NextWalkerState = LEVEL2; + PRegEn = 1'b1; + end + end + + LEVEL2: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadGigapage) begin + NextWalkerState = LEAF; + TranslationPAdr = TranslationVAdr[`PA_BITS-1:0]; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL1_SET_ADRE; + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL1_SET_ADRE: begin + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + end + + LEVEL1_WDV: begin + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end + + LEVEL1: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + TranslationPAdr = TranslationVAdr[`PA_BITS-1:0]; + + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_SET_ADRE; + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL0_SET_ADRE: begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + end + + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end + + LEVEL0: begin + if (ValidPTE && LeafPTE && ~AccessAlert) begin + NextWalkerState = LEAF; + TranslationPAdr = TranslationVAdr[`PA_BITS-1:0]; + end else begin + NextWalkerState = FAULT; + end + end + + LEAF: begin + PageTableEntry = CurrentPTE; + PageType = (PreviousWalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux? + ((PreviousWalkerState == LEVEL2) ? 2'b10 : + ((PreviousWalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdr[`PA_BITS-1:0]; + NextWalkerState = IDLE; + end + + FAULT: begin + SelPTW = 1'b0; + NextWalkerState = IDLE; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + end + + // Default case should never happen + default: begin + NextWalkerState = IDLE; + end + + endcase + end + + // A terapage is a level 3 leaf page. This page must have zero PPN[2], + // zero PPN[1], and zero PPN[0] + assign TerapageMisaligned = |(CurrentPPN[26:0]); + // A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and + // zero PPN[0] + assign GigapageMisaligned = |(CurrentPPN[17:0]); + // A megapage is a Level 1 leaf page. This page must have zero PPN[0]. + assign MegapageMisaligned = |(CurrentPPN[8:0]); + + assign BadTerapage = TerapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme + assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme + assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme + + assign VPN3 = TranslationVAdr[47:39]; + assign VPN2 = TranslationVAdr[38:30]; + assign VPN1 = TranslationVAdr[29:21]; + assign VPN0 = TranslationVAdr[20:12]; + + + // Capture page table entry from ahblite + flopenr #(`XLEN) ptereg(clk, reset, PRegEn, HPTWReadPTE, SavedPTE); + //mux2 #(`XLEN) ptemux(SavedPTE, HPTWReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; + assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; + + // *** Major issue. We need the full virtual address here. + // When the TLB's are update it use use the orignal address + // *** Currently truncate address to 32 bits. This must be changed if + // we support larger physical address spaces + assign HPTWPAdrE = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]}; + end //endgenerate end else begin - assign MMUPAdr = 0; - assign MMUTranslate = 0; + assign HPTWPAdrE = 0; assign HPTWRead = 0; assign WalkerInstrPageFaultF = 0; assign WalkerLoadPageFaultM = 0; assign WalkerStorePageFaultM = 0; + assign SelPTW = 0; end endgenerate diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 86abcb3f6..a95252f3b 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -61,7 +61,7 @@ module pmachecker ( assign AtomicAllowed = SelRegions[4]; // Detect access faults - assign PMAAccessFault = (~|SelRegions) & AccessRWX; + assign PMAAccessFault = SelRegions[6] & AccessRWX; assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault; assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault; diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 1d541d0cc..d982b9047 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -64,6 +64,8 @@ module privileged ( input logic LoadAccessFaultM, input logic StoreAccessFaultM, + output logic ExceptionM, + output logic PendingInterruptM, output logic IllegalFPUInstrE, output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, diff --git a/wally-pipelined/src/privileged/trap.sv b/wally-pipelined/src/privileged/trap.sv index 9eec51c26..7462353db 100644 --- a/wally-pipelined/src/privileged/trap.sv +++ b/wally-pipelined/src/privileged/trap.sv @@ -27,23 +27,26 @@ `include "wally-config.vh" module trap ( - input logic clk, reset, - input logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultM, - input logic BreakpointFaultM, LoadMisalignedFaultM, StoreMisalignedFaultM, - input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultM, - input logic LoadPageFaultM, StorePageFaultM, - input logic mretM, sretM, uretM, - input logic [1:0] PrivilegeModeW, NextPrivilegeModeM, - input logic [`XLEN-1:0] MEPC_REGW, SEPC_REGW, UEPC_REGW, UTVEC_REGW, STVEC_REGW, MTVEC_REGW, - input logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW, - input logic STATUS_MIE, STATUS_SIE, - input logic [`XLEN-1:0] PCM, - input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, - input logic [31:0] InstrM, - input logic StallW, - input logic InstrValidM, CommittedM, - output logic NonBusTrapM, TrapM, MTrapM, STrapM, UTrapM, RetM, - output logic InterruptM, + input logic clk, reset, + input logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultM, + input logic BreakpointFaultM, LoadMisalignedFaultM, StoreMisalignedFaultM, + input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultM, + input logic LoadPageFaultM, StorePageFaultM, + input logic mretM, sretM, uretM, + input logic [1:0] PrivilegeModeW, NextPrivilegeModeM, + input logic [`XLEN-1:0] MEPC_REGW, SEPC_REGW, UEPC_REGW, UTVEC_REGW, STVEC_REGW, MTVEC_REGW, + input logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW, + input logic STATUS_MIE, STATUS_SIE, + input logic [`XLEN-1:0] PCM, + input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, + input logic [31:0] InstrM, + input logic StallW, + input logic InstrValidM, CommittedM, + output logic NonBusTrapM, TrapM, MTrapM, STrapM, UTrapM, RetM, + output logic InterruptM, + output logic ExceptionM, + output logic PendingInterruptM, + output logic [`XLEN-1:0] PrivilegedNextPCM, CauseM, NextFaultMtvalM // output logic [11:0] MIP_REGW, SIP_REGW, UIP_REGW, MIE_REGW, SIE_REGW, UIE_REGW, // input logic WriteMIPM, WriteSIPM, WriteUIPM, WriteMIEM, WriteSIEM, WriteUIEM @@ -59,7 +62,10 @@ module trap ( assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) || STATUS_MIE; // if M ints enabled or lower priv 3.1.9 assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || STATUS_SIE; // if S ints enabled or lower priv 3.1.9 assign PendingIntsM = ((MIP_REGW & MIE_REGW) & ({12{MIntGlobalEnM}} & 12'h888)) | ((SIP_REGW & SIE_REGW) & ({12{SIntGlobalEnM}} & 12'h222)); - assign InterruptM = (|PendingIntsM) & InstrValidM & ~CommittedM; + assign PendingInterruptM = (|PendingIntsM) & InstrValidM; + assign InterruptM = PendingInterruptM & ~CommittedM; + assign ExceptionM = BusTrapM | NonBusTrapM; + // interrupt if any sources are pending // & with a M stage valid bit to avoid interrupts from interrupt a nonexistent flushed instruction (in the M stage) // & with ~CommittedM to make sure MEPC isn't chosen so as to rerun the same instr twice diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index f8db959de..98e35d701 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -63,6 +63,7 @@ module wallypipelinedhart // new signals that must connect through DP logic MulDivE, W64E; logic CSRReadM, CSRWriteM, PrivilegedM; + logic [1:0] AtomicE; logic [1:0] AtomicM; logic [`XLEN-1:0] SrcAE, SrcBE; logic [`XLEN-1:0] SrcAM; @@ -73,6 +74,7 @@ module wallypipelinedhart logic [`XLEN-1:0] PCTargetE; logic [`XLEN-1:0] CSRReadValW, MulDivResultW; logic [`XLEN-1:0] PrivilegedNextPCM; + logic [1:0] MemRWE; logic [1:0] MemRWM; logic InstrValidM; logic InstrMisalignedFaultM; @@ -89,7 +91,7 @@ module wallypipelinedhart logic DivDoneE; logic DivBusyE; logic RegWriteD; - logic LoadStallD, MulDivStallD, CSRRdStallD; + logic LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD; logic SquashSCM, SquashSCW; // floating point unit signals logic [2:0] FRM_REGW; @@ -125,50 +127,43 @@ module wallypipelinedhart // IMem stalls logic ICacheStallF; - logic DCacheStall; + logic LSUStall; - // bus interface to dmem - logic MemReadM, MemWriteM; - logic [1:0] AtomicMaskedM; + // cpu lsu interface logic [2:0] Funct3M; - logic [`XLEN-1:0] MemAdrM, WriteDataM; - logic [`PA_BITS-1:0] MemPAdrM; + logic [`XLEN-1:0] MemAdrM, MemAdrE, WriteDataM; logic [`XLEN-1:0] ReadDataW; + logic CommittedM; + + // AHB ifu interface logic [`PA_BITS-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; - logic InstrAckF, MemAckW; - logic CommitM, CommittedM; - + logic InstrAckF; + + // AHB LSU interface + logic [`PA_BITS-1:0] DCtoAHBPAdrM; + logic DCtoAHBReadM; + logic DCtoAHBWriteM; + logic DCfromAHBAck; + logic [`XLEN-1:0] DCfromAHBReadData; + logic [`XLEN-1:0] DCtoAHBWriteData; + + logic CommitM; + logic BPPredWrongE; logic BPPredDirWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; logic BPPredClassNonCFIWrongM; - - logic [`XLEN-1:0] WriteDatatmpM; - logic [4:0] InstrClassM; - - logic [`XLEN-1:0] HRDATAW; - - // IEU vs HPTW arbitration signals to send to LSU - logic [1:0] MemRWMtoLSU; - logic [2:0] SizeToLSU; - logic [1:0] AtomicMtoLSU; - logic [`XLEN-1:0] MemAdrMtoLSU; - logic [`XLEN-1:0] WriteDataMtoLSU; - logic [`XLEN-1:0] ReadDataWFromLSU; - logic CommittedMfromLSU; - logic SquashSCWfromLSU; - logic DataMisalignedMfromLSU; - logic StallWtoLSU; - logic StallWfromLSU; - logic [2:0] SizeFromLSU; logic InstrAccessFaultF; + logic [2:0] DCtoAHBSizeM; + logic ExceptionM; + logic PendingInterruptM; ifu ifu(.InstrInF(InstrRData), @@ -177,43 +172,36 @@ module wallypipelinedhart ieu ieu(.*); // integer execution unit: integer register file, datapath and controller - - // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - - lsu lsu(.clk(clk), .reset(reset), .StallM(StallM), .FlushM(FlushM), .StallW(StallW), .FlushW(FlushW), - // connected to arbiter (reconnect to CPU) + // CPU interface .MemRWM(MemRWM), - .Funct3M(Funct3M), - .AtomicM(AtomicM), + .Funct3M(Funct3M), + .Funct7M(InstrM[31:25]), + .AtomicM(AtomicM), + .ExceptionM(ExceptionM), + .PendingInterruptM(PendingInterruptM), .CommittedM(CommittedM), .SquashSCW(SquashSCW), .DataMisalignedM(DataMisalignedM), + .MemAdrE(MemAdrE), .MemAdrM(MemAdrM), .WriteDataM(WriteDataM), .ReadDataW(ReadDataW), // connected to ahb (all stay the same) .CommitM(CommitM), - .MemPAdrM(MemPAdrM), - .MemReadM(MemReadM), - .MemWriteM(MemWriteM), - .AtomicMaskedM(AtomicMaskedM), - .MemAckW(MemAckW), - .HRDATAW(HRDATAW), - .SizeFromLSU(SizeFromLSU), // stays the same - .StallWfromLSU(StallWfromLSU), // stays the same - .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? - // currently not connected (but will need to be used for lsu talking to ahb. - .HADDR(HADDR), - .HSIZE(HSIZE), - .HBURST(HBURST), - .HWRITE(HWRITE), + .DCtoAHBPAdrM(DCtoAHBPAdrM), + .DCtoAHBReadM(DCtoAHBReadM), + .DCtoAHBWriteM(DCtoAHBWriteM), + .DCfromAHBAck(DCfromAHBAck), + .DCfromAHBReadData(DCfromAHBReadData), + .DCtoAHBWriteData(DCtoAHBWriteData), + .DCtoAHBSizeM(DCtoAHBSizeM), // connect to csr or privilege and stay the same. .PrivilegeModeW(PrivilegeModeW), // connects to csr @@ -235,7 +223,6 @@ module wallypipelinedhart .StoreMisalignedFaultM(StoreMisalignedFaultM), // connects to privilege .StoreAccessFaultM(StoreAccessFaultM), // connects to privilege - // connected to hptw. Move to internal. .PCF(PCF), .ITLBMissF(ITLBMissF), .PageTableEntryF(PageTableEntryF), @@ -247,19 +234,30 @@ module wallypipelinedhart .DTLBHitM(DTLBHitM), // not connected remove - .DCacheStall(DCacheStall)) // change to DCacheStall - ; + .LSUStall(LSUStall)); // change to LSUStall - ahblite ebu( - //.InstrReadF(1'b0), - //.InstrRData(InstrF), // hook up InstrF later - .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking - .WriteDataM(WriteDataM), - .MemSizeM(SizeFromLSU[1:0]), .UnsignedLoadM(SizeFromLSU[2]), - .Funct7M(InstrM[31:25]), - .HRDATAW(HRDATAW), - .StallW(StallWfromLSU), + + + ahblite ebu(// IFU connections + .InstrPAdrF(InstrPAdrF), + .InstrReadF(InstrReadF), + .InstrRData(InstrRData), + .InstrAckF(InstrAckF), + // LSU connections + .DCtoAHBPAdrM(DCtoAHBPAdrM), // rename to DCtoAHBPAdrM + .DCtoAHBReadM(DCtoAHBReadM), // rename to DCtoAHBReadM + .DCtoAHBWriteM(DCtoAHBWriteM), // rename to DCtoAHBWriteM + .DCtoAHBWriteData(DCtoAHBWriteData), + .DCfromAHBReadData(DCfromAHBReadData), + .DCfromAHBAck(DCfromAHBAck), + // remove these + .MemSizeM(DCtoAHBSizeM[1:0]), // *** depends on XLEN should be removed + .UnsignedLoadM(1'b0), + .Funct7M(7'b0), + .HRDATAW(), + .StallW(1'b0), + .AtomicMaskedM(2'b00), .*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 30cf419d0..9160e4a43 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -44,14 +44,14 @@ module testbench(); logic [31:0] InstrW; logic [`XLEN-1:0] meminit; - //string tests32mmu[] = '{ - //"rv32mmu/WALLY-MMU-SV32", "3000" - // }; + string tests32mmu[] = '{ + "rv32mmu/WALLY-MMU-SV32", "3000" + }; - //string tests64mmu[] = '{ - //"rv64mmu/WALLY-MMU-SV48", "3000", - //"rv64mmu/WALLY-MMU-SV39", "3000" - //}; + string tests64mmu[] = '{ + "rv64mmu/WALLY-MMU-SV48", "3000", + "rv64mmu/WALLY-MMU-SV39", "3000" + }; string tests32f[] = '{ @@ -212,6 +212,7 @@ string tests32f[] = '{ string tests64i[] = '{ //"rv64i/WALLY-PIPELINE-100K", "f7ff0", + //"rv64i/WALLY-LOAD", "11bf0", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -282,7 +283,7 @@ string tests32f[] = '{ "rv64i/WALLY-SLLI", "3000", "rv64i/WALLY-SRLI", "3000", "rv64i/WALLY-SRAI", "3000", - "rv64i/WALLY-LOAD", "11bf0", + "rv64i/WALLY-JAL", "4000", "rv64i/WALLY-JALR", "3000", "rv64i/WALLY-STORE", "3000", @@ -509,6 +510,9 @@ string tests32f[] = '{ logic HCLK, HRESETn; logic [`XLEN-1:0] PCW; + logic DCacheFlushDone, DCacheFlushStart; + + logic [`XLEN-1:0] debug; assign debug = dut.uncore.dtim.RAM[536872960]; @@ -531,14 +535,14 @@ string tests32f[] = '{ else if (TESTSPRIV) tests = tests64p; else begin - tests = {tests64p,tests64i,tests64periph}; + tests = {tests64p,tests64i, tests64periph}; if (`C_SUPPORTED) tests = {tests, tests64ic}; else tests = {tests, tests64iNOc}; if (`M_SUPPORTED) tests = {tests, tests64m}; - if (`A_SUPPORTED) tests = {tests, tests64a}; - //if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; + //if (`A_SUPPORTED) tests = {tests, tests64a}; if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; + //if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; end //tests = {tests64a, tests}; end else begin // RV32 @@ -552,7 +556,7 @@ string tests32f[] = '{ if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; - if (`A_SUPPORTED) tests = {tests, tests32a}; + //if (`A_SUPPORTED) tests = {tests, tests32a}; if (`F_SUPPORTED) tests = {tests32f, tests}; //if (`MEM_VIRTMEM) tests = {tests, tests32mmu}; end @@ -621,10 +625,17 @@ string tests32f[] = '{ // check results always @(negedge clk) begin +/* -----\/----- EXCLUDED -----\/----- if (dut.hart.priv.EcallFaultM && - (dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && dut.hart.ieu.dp.regf.wd3 == 1))) begin + (dut.hart.ieu.dp.regf.rf[3] == 1 || + (dut.hart.ieu.dp.regf.we3 && + dut.hart.ieu.dp.regf.a3 == 3 && + dut.hart.ieu.dp.regf.wd3 == 1))) begin + -----/\----- EXCLUDED -----/\----- */ + if (DCacheFlushDone) begin $display("Code ended with ecall with gp = 1"); - #60; // give time for instructions in pipeline to finish + + #600; // give time for instructions in pipeline to finish // clear signature to prevent contamination from previous tests for(i=0; i>(1+`XLEN/32):(`TIM_RANGE+`TIM_BASE)>>1+(`XLEN/32)]; + + generate + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .logblockbytelen(logblockbytelen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.hart.lsu.dcache.CacheWays[way].MemWay.CacheTagMem.StoredData[index]), + .valid(testbench.dut.hart.lsu.dcache.CacheWays[way].MemWay.ValidBits[index]), + .dirty(testbench.dut.hart.lsu.dcache.CacheWays[way].MemWay.DirtyBits[index]), + .data(testbench.dut.hart.lsu.dcache.CacheWays[way].MemWay.word[cacheWord].CacheDataMem.StoredData[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end + end + end + endgenerate + +/* -----\/----- EXCLUDED -----\/----- + .Adr(((testbench.dut.hart.lsu.dcache.CacheWays[way].MemWay.CacheTagMem.StoredData[index] << tagstart) + + (index << logblockbytelen) + (cacheWord << $clog2(`XLEN/8)))), + -----/\----- EXCLUDED -----/\----- */ + + integer i, j, k; + + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(k = 0; k < numwords; k++) begin + if (CacheValid[j][i][k] && CacheDirty[j][i][k]) begin + ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; + end + end + end + end + end + end + + + flop #(1) doneReg(.clk(clk), + .d(start), + .q(done)); + +endmodule + +module copyShadow + #(parameter tagstart, logblockbytelen) + (input logic clk, + input logic start, + input logic [`PA_BITS-1:tagstart] tag, + input logic valid, dirty, + input logic [`XLEN-1:0] data, + input logic [32-1:0] index, + input logic [32-1:0] cacheWord, + output logic [`XLEN-1:0] CacheData, + output logic [`PA_BITS-1:0] CacheAdr, + output logic [`XLEN-1:0] CacheTag, + output logic CacheValid, + output logic CacheDirty); + + + always_ff @(posedge clk) begin + if(start) begin + CacheTag = tag; + CacheValid = valid; + CacheDirty = dirty; + CacheData = data; + CacheAdr = (tag << tagstart) + (index << logblockbytelen) + (cacheWord << $clog2(`XLEN/8)); + end + end + +endmodule +