diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index 7f08e56b3..5149a2539 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -40,7 +40,7 @@ vsim workopt view wave -- display input and output signals as hexidecimal values -do ./wave-dos/peripheral-waves.do +do ./wave-dos/cache-waves.do -- Set Wave Output Items TreeUpdate [SetDefaultTree] diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do index f043d779e..263693d74 100644 --- a/wally-pipelined/regression/wave-dos/ahb-waves.do +++ b/wally-pipelined/regression/wave-dos/ahb-waves.do @@ -4,7 +4,7 @@ add wave -divider #add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall -add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/ICacheStallF add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE @@ -19,16 +19,8 @@ add wave -divider add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD - add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD -add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD -add wave -divider -add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF -add wave /testbench/dut/hart/ifu/ic/DelayF -add wave /testbench/dut/hart/ifu/ic/DelaySideF -add wave /testbench/dut/hart/ifu/ic/DelayD -add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE @@ -59,7 +51,6 @@ add wave -hex /testbench/dut/hart/ebu/HRDATA add wave -hex /testbench/dut/hart/ebu/HWRITE add wave -hex /testbench/dut/hart/ebu/HWDATA add wave -hex /testbench/dut/hart/ebu/CaptureDataM -add wave -hex /testbench/dut/hart/ebu/InstrStall add wave -divider add wave -hex /testbench/dut/uncore/dtim/* diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do new file mode 100644 index 000000000..c7b32e1ef --- /dev/null +++ b/wally-pipelined/regression/wave-dos/cache-waves.do @@ -0,0 +1,84 @@ +add wave /testbench/clk +add wave /testbench/reset +add wave -divider + +#add wave /testbench/dut/hart/ebu/IReadF +add wave /testbench/dut/hart/DataStall +add wave /testbench/dut/hart/ICacheStallF +add wave /testbench/dut/hart/StallF +add wave /testbench/dut/hart/StallD +add wave /testbench/dut/hart/StallE +add wave /testbench/dut/hart/StallM +add wave /testbench/dut/hart/StallW +add wave /testbench/dut/hart/FlushD +add wave /testbench/dut/hart/FlushE +add wave /testbench/dut/hart/FlushM +add wave /testbench/dut/hart/FlushW + +add wave -divider +add wave -hex /testbench/dut/hart/ifu/PCF +add wave -hex /testbench/dut/hart/ifu/PCD +add wave -hex /testbench/dut/hart/ifu/InstrD + +add wave /testbench/InstrDName +add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD +add wave -hex /testbench/dut/hart/ifu/ic/controller/AlignedInstrRawD +add wave -divider +add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchState +add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchWordNum +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteEnable +add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF +add wave -hex /testbench/dut/hart/ifu/ic/InstrAckF +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteData +add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWritePAdr +add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedState +add wave -hex /testbench/dut/hart/ifu/ic/controller/MisalignedHalfInstrF +add wave -divider + + +add wave -hex /testbench/dut/hart/ifu/PCE +add wave -hex /testbench/dut/hart/ifu/InstrE +add wave /testbench/InstrEName +add wave -hex /testbench/dut/hart/ieu/dp/SrcAE +add wave -hex /testbench/dut/hart/ieu/dp/SrcBE +add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE +#add wave /testbench/dut/hart/ieu/dp/PCSrcE +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCM +add wave -hex /testbench/dut/hart/ifu/InstrM +add wave /testbench/InstrMName +add wave /testbench/dut/uncore/dtim/memwrite +add wave -hex /testbench/dut/uncore/HADDR +add wave -hex /testbench/dut/uncore/HWDATA +add wave -divider + +add wave -hex /testbench/dut/hart/ebu/MemReadM +add wave -hex /testbench/dut/hart/ebu/InstrReadF +add wave -hex /testbench/dut/hart/ebu/BusState +add wave -hex /testbench/dut/hart/ebu/NextBusState +add wave -hex /testbench/dut/hart/ebu/HADDR +add wave -hex /testbench/dut/hart/ebu/HREADY +add wave -hex /testbench/dut/hart/ebu/HTRANS +add wave -hex /testbench/dut/hart/ebu/HRDATA +add wave -hex /testbench/dut/hart/ebu/HWRITE +add wave -hex /testbench/dut/hart/ebu/HWDATA +add wave -hex /testbench/dut/hart/ebu/CaptureDataM +add wave -divider + +add wave -hex /testbench/dut/uncore/dtim/* +add wave -divider + +add wave -hex /testbench/dut/hart/ifu/PCW +add wave -hex /testbench/dut/hart/ifu/InstrW +add wave /testbench/InstrWName +add wave /testbench/dut/hart/ieu/dp/RegWriteW +add wave -hex /testbench/dut/hart/ebu/ReadDataW +add wave -hex /testbench/dut/hart/ieu/dp/ResultW +add wave -hex /testbench/dut/hart/ieu/dp/RdW +add wave -divider + +add wave -hex /testbench/dut/uncore/dtim/* +add wave -divider + +add wave -hex -r /testbench/* diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index 76e1c168a..3e26c0ccc 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -6,7 +6,7 @@ add wave /testbench/reset add wave -divider #add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall -add wave /testbench/dut/hart/InstrStall +add wave /testbench/dut/hart/ICacheStallF add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE @@ -23,11 +23,6 @@ add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/InstrD add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD -add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD -add wave /testbench/dut/hart/ifu/ic/DelayF -add wave /testbench/dut/hart/ifu/ic/DelaySideF -add wave /testbench/dut/hart/ifu/ic/DelayD -add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD add wave -divider add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/InstrE diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 470cc599a..7c61d4682 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -1,4 +1,5 @@ onerror {resume} +quietly virtual function -install /testbench/dut/hart/ifu/icache/cachemem -env /testbench/dut/hart/ifu/icache/cachemem { &{/testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[4], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[3], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[2], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[1], /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr[0] }} offset quietly WaveActivateNextPane {} 0 add wave -noupdate /testbench/clk add wave -noupdate /testbench/reset @@ -21,14 +22,13 @@ add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/InstrStall -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DataStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE @@ -39,11 +39,6 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW -add wave -noupdate /testbench/dut/hart/hzu/StallFCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallDCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallECause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallMCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallWCause_Q add wave -noupdate -group Bpred -expand -group direction -divider Update add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdatePC add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdateEN @@ -65,51 +60,54 @@ add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/BPPredWrongE add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rf -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/we3 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3 -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -expand -group alu -divider internals -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/we3 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3 +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -group alu -divider internals +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate /testbench/InstrFName -add wave -noupdate -expand -group dcache /testbench/dut/hart/MemAdrM -add wave -noupdate -expand -group dcache /testbench/dut/hart/MemPAdrM -add wave -noupdate -expand -group dcache /testbench/dut/hart/WriteDataM -add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/MemRWM +add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemReadM +add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM +add wave -noupdate -expand -group {dcache memory} /testbench/dut/hart/dmem/MemAckW +add wave -noupdate -group dcache /testbench/dut/hart/MemAdrM +add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM +add wave -noupdate -group dcache /testbench/dut/hart/WriteDataM +add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemRWM add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -128,6 +126,7 @@ add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/ALURe add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcAE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE add wave -noupdate /testbench/dut/hart/ieu/dp/ALUResultM +add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCNextF add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE @@ -148,35 +147,95 @@ add wave -noupdate -group {function radix debug} /testbench/functionRadix/functi add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionAddr add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/ProgramAddrIndex add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionName -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/InstrD -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcAE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcBE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/Funct3E -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/W64E -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallM -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushM -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivResultW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivDoneE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivBusyE -add wave -noupdate /testbench/dut/hart/mdu/genblk1/gclk -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/N -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/D -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/Q -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate /testbench/dut/hart/MulDivResultW -add wave -noupdate /testbench/dut/hart/mdu/genblk1/PrelimResultE -add wave -noupdate /testbench/dut/hart/mdu/Funct3E -add wave -noupdate /testbench/dut/hart/mdu/genblk1/QuotE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/InstrD +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcAE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcBE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/Funct3E +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/W64E +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallM +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 +add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/cachemem/OldReadPAdr +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBByteLength +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBOFFETWIDTH +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/BlockByteLength +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/OFFSETWIDTH +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/WORDSPERLINE +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LOGWPL +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LINESIZE +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWritePAdr +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValid +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/ReadTag +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataTag +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadAddr +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/ReadPAdr +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WritePAdr +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteSet +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteTag +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCNextPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/BusState +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HCLK +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HREADY +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HRESP +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDR +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWDATA +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITE +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZE +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HBURST +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HPROT +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HTRANS +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HMASTLOCK +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HADDRD +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HSIZED +add wave -noupdate -expand -group AHB /testbench/dut/hart/ebu/HWRITED TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {128433 ns} 0} +WaveRestoreCursors {{Cursor 2} {9808206 ns} 0} {{Cursor 3} {9807791 ns} 0} {{Cursor 4} {85 ns} 0} quietly wave cursor active 1 configure wave -namecolwidth 250 -configure wave -valuecolwidth 229 +configure wave -valuecolwidth 513 configure wave -justifyvalue left configure wave -signalnamewidth 1 configure wave -snapdistance 10 @@ -189,4 +248,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {128007 ns} {128663 ns} +WaveRestoreZoom {0 ns} {1829700 ns} diff --git a/wally-pipelined/src/cache/cache-sram.sv b/wally-pipelined/src/cache/cache-sram.sv new file mode 100644 index 000000000..0ba0efa57 --- /dev/null +++ b/wally-pipelined/src/cache/cache-sram.sv @@ -0,0 +1,22 @@ +// Depth is number of bits in one "word" of the memory, width is number of such words +module Sram1Read1Write #(parameter DEPTH=128, WIDTH=256) ( + input logic clk, + // port 1 is read only + input logic [$clog2(WIDTH)-1:0] ReadAddr, + output logic [DEPTH-1:0] ReadData, + + // port 2 is write only + input logic [$clog2(WIDTH)-1:0] WriteAddr, + input logic [DEPTH-1:0] WriteData, + input logic WriteEnable +); + + logic [WIDTH-1:0][DEPTH-1:0] StoredData; + + always_ff @(posedge clk) begin + ReadData <= StoredData[ReadAddr]; + if (WriteEnable) begin + StoredData[WriteAddr] <= WriteData; + end + end +endmodule diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index f9d2bc8e8..34864d393 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -4,8 +4,7 @@ // Written: jaallen@g.hmc.edu 2021-03-23 // Modified: // -// Purpose: An implementation of a direct-mapped cache memory -// This cache is read-only, so "write"s to the memory are loading new data +// Purpose: An implementation of a direct-mapped cache memory, with read-only and write-through versions // // A component of the Wally configurable RISC-V project. // @@ -26,10 +25,12 @@ `include "wally-config.vh" -module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) ( +// Read-only direct-mapped memory +module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( // Pipeline stuff input logic clk, input logic reset, + input logic stall, // If flush is high, invalidate the entire cache input logic flush, // Select which address to read (broken for efficiency's sake) @@ -44,50 +45,312 @@ module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, p output logic DataValid ); - localparam integer SETWIDTH = $clog2(NUMLINES); - localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); - localparam integer TAGWIDTH = `XLEN-SETWIDTH-OFFSETWIDTH; + // Various compile-time constants + localparam integer WORDWIDTH = $clog2(WORDSIZE/8); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; - logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs; - logic [NUMLINES-1:0] ValidOutputs; - logic [NUMLINES-1:0][TAGWIDTH-1:0] TagOutputs; - logic [OFFSETWIDTH-1:0] WordSelect; - logic [`XLEN-1:0] ReadPAdr; - logic [SETWIDTH-1:0] ReadSet, WriteSet; - logic [TAGWIDTH-1:0] ReadTag, WriteTag; + localparam integer OFFSETBEGIN = WORDWIDTH; + localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; + localparam integer SETBEGIN = OFFSETEND+1; + localparam integer SETEND = SETBEGIN + SETWIDTH - 1; + localparam integer TAGBEGIN = SETEND + 1; + localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; - // Swizzle bits to get the offset, set, and tag out of the read and write addresses + // Machinery to read from and write to the correct addresses in memory + logic [`XLEN-1:0] ReadPAdr; + logic [`XLEN-1:0] OldReadPAdr; + logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; + logic [SETWIDTH-1:0] ReadSet, WriteSet; + logic [TAGWIDTH-1:0] ReadTag, WriteTag; + logic [LINESIZE-1:0] ReadLine; + logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; + + // Machinery to check if a given read is valid and is the desired value + logic [TAGWIDTH-1:0] DataTag; + logic [NUMLINES-1:0] ValidOut; + logic DataValidBit; + + flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr); + + // Assign the read and write addresses in cache memory always_comb begin - // Read address - WordSelect = ReadLowerAdr[OFFSETWIDTH-1:0]; + ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; - ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; - ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; - // Write address - WriteSet = WritePAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH]; - WriteTag = WritePAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH]; + ReadSet = ReadPAdr[SETEND:SETBEGIN]; + ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; + + WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; + WriteSet = WritePAdr[SETEND:SETBEGIN]; + WriteTag = WritePAdr[TAGEND:TAGBEGIN]; end + // Depth is number of bits in one "word" of the memory, width is number of such words + Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + .*, + .ReadAddr(ReadSet), + .ReadData(ReadLine), + .WriteAddr(WriteSet), + .WriteData(WriteLine) + ); + Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + .*, + .ReadAddr(ReadSet), + .ReadData(DataTag), + .WriteAddr(WriteSet), + .WriteData(WriteTag) + ); + + // Pick the right bits coming out the read line + assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < NUMLINES; i++) begin - rocacheline #(LINESIZE, TAGWIDTH, WORDSIZE) lines ( - .*, - .WriteEnable(WriteEnable & (WriteSet == i)), - .WriteData(WriteLine), - .WriteTag(WriteTag), - .DataWord(LineOutputs[i]), - .DataTag(TagOutputs[i]), - .DataValid(ValidOutputs[i]) - ); + for (i=0; i < LINESIZE/WORDSIZE; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate - // Get the data and valid out of the lines - always_comb begin - DataWord = LineOutputs[ReadSet]; - DataValid = ValidOutputs[ReadSet] & (TagOutputs[ReadSet] == ReadTag); + // Correctly handle the valid bits + always_ff @(posedge clk, posedge reset) begin + if (reset || flush) begin + ValidOut <= {NUMLINES{1'b0}}; + end else begin + if (WriteEnable) begin + ValidOut[WriteSet] <= 1; + end + end + DataValidBit <= ValidOut[ReadSet]; end - + assign DataValid = DataValidBit && (DataTag == ReadTag); endmodule +module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + input logic re, + // If flush is high, invalidate the entire cache + input logic flush, + // Select which address to read (broken for efficiency's sake) + input logic [`XLEN-1:12] ReadUpperPAdr, + input logic [11:0] ReadLowerAdr, + // Write new data to the cache + input logic WriteEnable, + input logic [LINESIZE-1:0] WriteLine, + input logic [`XLEN-1:0] WritePAdr, + // Output the word, as well as if it is valid + output logic [31:0] DataWord, // *** was WORDSIZE-1 + output logic DataValid +); + + // Various compile-time constants + localparam integer WORDWIDTH = $clog2(WORDSIZE/8); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; + + localparam integer OFFSETBEGIN = WORDWIDTH; + localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; + localparam integer SETBEGIN = OFFSETEND+1; + localparam integer SETEND = SETBEGIN + SETWIDTH - 1; + localparam integer TAGBEGIN = SETEND + 1; + localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; + + // Machinery to read from and write to the correct addresses in memory + logic [`XLEN-1:0] ReadPAdr; + logic [`XLEN-1:0] OldReadPAdr; + logic [OFFSETWIDTH-1:0] ReadOffset, WriteOffset; + logic [SETWIDTH-1:0] ReadSet, WriteSet; + logic [TAGWIDTH-1:0] ReadTag, WriteTag; + logic [LINESIZE-1:0] ReadLine; + logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; + + // Machinery to check if a given read is valid and is the desired value + logic [TAGWIDTH-1:0] DataTag; + logic [NUMLINES-1:0] ValidOut; + logic DataValidBit; + + flopenr #(`XLEN) ReadPAdrFlop(clk, reset, re, ReadPAdr, OldReadPAdr); + + // Assign the read and write addresses in cache memory + always_comb begin + ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; + ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; + ReadSet = ReadPAdr[SETEND:SETBEGIN]; + ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; + + WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN]; + WriteSet = WritePAdr[SETEND:SETBEGIN]; + WriteTag = WritePAdr[TAGEND:TAGBEGIN]; + end + + // Depth is number of bits in one "word" of the memory, width is number of such words + Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + .*, + .ReadAddr(ReadSet), + .ReadData(ReadLine), + .WriteAddr(WriteSet), + .WriteData(WriteLine) + ); + Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + .*, + .ReadAddr(ReadSet), + .ReadData(DataTag), + .WriteAddr(WriteSet), + .WriteData(WriteTag) + ); + + // Pick the right bits coming out the read line + //assign DataWord = ReadLineTransformed[ReadOffset]; + //logic [31:0] tempRD; + always_comb begin + case (OldReadPAdr[4:1]) + 0: DataWord = ReadLine[31:0]; + 1: DataWord = ReadLine[47:16]; + 2: DataWord = ReadLine[63:32]; + 3: DataWord = ReadLine[79:48]; + + 4: DataWord = ReadLine[95:64]; + 5: DataWord = ReadLine[111:80]; + 6: DataWord = ReadLine[127:96]; + 7: DataWord = ReadLine[143:112]; + + 8: DataWord = ReadLine[159:128]; + 9: DataWord = ReadLine[175:144]; + 10: DataWord = ReadLine[191:160]; + 11: DataWord = ReadLine[207:176]; + + 12: DataWord = ReadLine[223:192]; + 13: DataWord = ReadLine[239:208]; + 14: DataWord = ReadLine[255:224]; + 15: DataWord = {16'b0, ReadLine[255:240]}; + endcase + end + genvar i; + generate + for (i=0; i < LINESIZE/WORDSIZE; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; + end + endgenerate + + // Correctly handle the valid bits + always_ff @(posedge clk, posedge reset) begin + if (reset || flush) begin + ValidOut <= {NUMLINES{1'b0}}; + end else begin + if (WriteEnable) begin + ValidOut[WriteSet] <= 1; + end + end + DataValidBit <= ValidOut[ReadSet]; + end + assign DataValid = DataValidBit && (DataTag == ReadTag); +endmodule + +// Write-through direct-mapped memory +module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( + // Pipeline stuff + input logic clk, + input logic reset, + input logic stall, + // If flush is high, invalidate the entire cache + input logic flush, + // Select which address to read (broken for efficiency's sake) + input logic [`XLEN-1:12] ReadUpperPAdr, + input logic [11:0] ReadLowerAdr, + // Load new data into the cache (from main memory) + input logic LoadEnable, + input logic [LINESIZE-1:0] LoadLine, + input logic [`XLEN-1:0] LoadPAdr, + // Write data to the cache (like from a store instruction) + input logic WriteEnable, + input logic [WORDSIZE-1:0] WriteWord, + input logic [`XLEN-1:0] WritePAdr, + input logic [1:0] WriteSize, // Specify size of the write (non-written bits should be preserved) + // Output the word, as well as if it is valid + output logic [WORDSIZE-1:0] DataWord, + output logic DataValid +); + + // Various compile-time constants + localparam integer WORDWIDTH = $clog2(WORDSIZE/8); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); + localparam integer SETWIDTH = $clog2(NUMLINES); + localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; + + localparam integer OFFSETBEGIN = WORDWIDTH; + localparam integer OFFSETEND = OFFSETBEGIN+OFFSETWIDTH-1; + localparam integer SETBEGIN = OFFSETEND+1; + localparam integer SETEND = SETBEGIN + SETWIDTH - 1; + localparam integer TAGBEGIN = SETEND + 1; + localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; + + // Machinery to read from and write to the correct addresses in memory + logic [`XLEN-1:0] ReadPAdr; + logic [`XLEN-1:0] OldReadPAdr; + logic [OFFSETWIDTH-1:0] ReadOffset, LoadOffset; + logic [SETWIDTH-1:0] ReadSet, LoadSet; + logic [TAGWIDTH-1:0] ReadTag, LoadTag; + logic [LINESIZE-1:0] ReadLine; + logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; + + // Machinery to check if a given read is valid and is the desired value + logic [TAGWIDTH-1:0] DataTag; + logic [NUMLINES-1:0] ValidOut; + logic DataValidBit; + + flopenr #(`XLEN) ReadPAdrFlop(clk, reset, ~stall, ReadPAdr, OldReadPAdr); + + // Assign the read and write addresses in cache memory + always_comb begin + ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN]; + ReadPAdr = {ReadUpperPAdr, ReadLowerAdr}; + ReadSet = ReadPAdr[SETEND:SETBEGIN]; + ReadTag = OldReadPAdr[TAGEND:TAGBEGIN]; + + LoadOffset = LoadPAdr[OFFSETEND:OFFSETBEGIN]; + LoadSet = LoadPAdr[SETEND:SETBEGIN]; + LoadTag = LoadPAdr[TAGEND:TAGBEGIN]; + end + + // Depth is number of bits in one "word" of the memory, width is number of such words + Sram1Read1Write #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + .*, + .ReadAddr(ReadSet), + .ReadData(ReadLine), + .WriteAddr(LoadSet), + .WriteData(LoadLine), + .WriteEnable(LoadEnable) + ); + Sram1Read1Write #(.DEPTH(TAGWIDTH), .WIDTH(NUMLINES)) cachetags ( + .*, + .ReadAddr(ReadSet), + .ReadData(DataTag), + .WriteAddr(LoadSet), + .WriteData(LoadTag), + .WriteEnable(LoadEnable) + ); + + // Pick the right bits coming out the read line + assign DataWord = ReadLineTransformed[ReadOffset]; + genvar i; + generate + for (i=0; i < LINESIZE/WORDSIZE; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; + end + endgenerate + + // Correctly handle the valid bits + always_ff @(posedge clk, posedge reset) begin + if (reset || flush) begin + ValidOut <= {NUMLINES{1'b0}}; + end else begin + if (LoadEnable) begin + ValidOut[LoadSet] <= 1; + end + end + DataValidBit <= ValidOut[ReadSet]; + end + assign DataValid = DataValidBit && (DataTag == ReadTag); +endmodule diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv deleted file mode 100644 index 8b6e5e468..000000000 --- a/wally-pipelined/src/cache/line.sv +++ /dev/null @@ -1,68 +0,0 @@ -/////////////////////////////////////////// -// line.sv -// -// Written: jaallen@g.hmc.edu 2021-03-23 -// Modified: -// -// Purpose: An implementation of a single cache line -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -// A read-only cache line ("write"ing to this line is loading new data, not writing to memory -module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter WORDSIZE = `XLEN) ( - // Pipeline stuff - input logic clk, - input logic reset, - // If flush is high, invalidate this word - input logic flush, - // Select which word within the line - input logic [$clog2(LINESIZE/8)-1:0] WordSelect, - // Write new data to the line - input logic WriteEnable, - input logic [LINESIZE-1:0] WriteData, - input logic [TAGSIZE-1:0] WriteTag, - // Output the word, as well as the tag and if it is valid - output logic [WORDSIZE-1:0] DataWord, - output logic [TAGSIZE-1:0] DataTag, - output logic DataValid -); - - localparam integer OFFSETSIZE = $clog2(LINESIZE/8); - localparam integer NUMWORDS = LINESIZE/WORDSIZE; - - logic [NUMWORDS-1:0][WORDSIZE-1:0] DataLinesIn, DataLinesOut; - - flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid); - flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag); - - genvar i; - generate - for (i=0; i < NUMWORDS; i++) begin - assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i]; - flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]); - end - endgenerate - - - always_comb begin - DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]]; - end - -endmodule diff --git a/wally-pipelined/src/dmem/dcache.sv b/wally-pipelined/src/dmem/dcache.sv new file mode 100644 index 000000000..243c69759 --- /dev/null +++ b/wally-pipelined/src/dmem/dcache.sv @@ -0,0 +1,184 @@ +/////////////////////////////////////////// +// dcache.sv +// +// Written: jaallen@g.hmc.edu 2021-04-15 +// Modified: +// +// Purpose: Cache memory for the dmem so it can access memory less often, saving cycles +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module dcache( + // Basic pipeline stuff + input logic clk, reset, + input logic StallW, + input logic FlushW, + // Upper bits of physical address + input logic [`XLEN-1:12] UpperPAdrM, + // Lower 12 bits of virtual address, since it's faster this way + input logic [11:0] LowerVAdrM, + // Write to the dcache + input logic [`XLEN-1:0] DCacheWriteDataM, + input logic DCacheReadM, DCacheWriteM, + // Data read in from the ebu unit + input logic [`XLEN-1:0] ReadDataW, + input logic MemAckW, + // Access requested from the ebu unit + output logic [`XLEN-1:0] MemPAdrM, + output logic MemReadM, MemWriteM, + // High if the dcache is requesting a stall + output logic DCacheStallW, + // The data that was requested from the cache + output logic [`XLEN-1:0] DCacheReadW +); + + // Configuration parameters + // TODO Move these to a config file + localparam integer DCACHELINESIZE = 256; + localparam integer DCACHENUMLINES = 512; + + // Input signals to cache memory + logic FlushMem; + logic [`XLEN-1:12] DCacheMemUpperPAdr; + logic [11:0] DCacheMemLowerAdr; + logic DCacheMemWriteEnable; + logic [DCACHELINESIZE-1:0] DCacheMemWriteData; + logic [`XLEN-1:0] DCacheMemWritePAdr; + logic EndFetchState; + // Output signals from cache memory + logic [`XLEN-1:0] DCacheMemReadData; + logic DCacheMemReadValid; + + wtdirectmappedmem #(.LINESIZE(DCACHELINESIZE), .NUMLINES(DCACHENUMLINES), .WORDSIZE(`XLEN)) cachemem( + .*, + // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall + .stall(StallW), + .flush(FlushMem), + .ReadUpperPAdr(DCacheMemUpperPAdr), + .ReadLowerAdr(DCacheMemLowerAdr), + .LoadEnable(DCacheMemWriteEnable), + .LoadLine(DCacheMemWriteData), + .LoadPAdr(DCacheMemWritePAdr), + .DataWord(DCacheMemReadData), + .DataValid(DCacheMemReadValid), + .WriteEnable(0), + .WriteWord(0), + .WritePAdr(0), + .WriteSize(2'b10) + ); + + dcachecontroller #(.LINESIZE(DCACHELINESIZE)) controller(.*); + + // For now, assume no writes to executable memory + assign FlushMem = 1'b0; +endmodule + +module dcachecontroller #(parameter LINESIZE = 256) ( + // Inputs from pipeline + input logic clk, reset, + input logic StallW, + input logic FlushW, + + // Input the address to read + // The upper bits of the physical pc + input logic [`XLEN-1:12] DCacheMemUpperPAdr, + // The lower bits of the virtual pc + input logic [11:0] DCacheMemLowerAdr, + + // Signals to/from cache memory + // The read coming out of it + input logic [`XLEN-1:0] DCacheMemReadData, + input logic DCacheMemReadValid, + // Load data into the cache + output logic DCacheMemWriteEnable, + output logic [LINESIZE-1:0] DCacheMemWriteData, + output logic [`XLEN-1:0] DCacheMemWritePAdr, + + // The read that was requested + output logic [31:0] DCacheReadW, + + // Outputs to pipeline control stuff + output logic DCacheStallW, EndFetchState, + + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] ReadDataW, + input logic MemAckW, + // The read we request from main memory + output logic [`XLEN-1:0] MemPAdrM, + output logic MemReadM, MemWriteM +); + + // Cache fault signals + logic FaultStall; + + // Handle happy path (data in cache) + + always_comb begin + DCacheReadW = DCacheMemReadData; + end + + + // Handle cache faults + + localparam integer WORDSPERLINE = LINESIZE/`XLEN; + localparam integer LOGWPL = $clog2(WORDSPERLINE); + localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); + + logic FetchState, BeginFetchState; + logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; + logic [`XLEN-1:0] LineAlignedPCPF; + + flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); + flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); + + genvar i; + generate + for (i=0; i < WORDSPERLINE; i++) begin + flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); + end + endgenerate + + // Enter the fetch state when we hit a cache fault + always_comb begin + BeginFetchState = ~DCacheMemReadValid & ~FetchState & (FetchWordNum == 0); + end + // Exit the fetch state once the cache line has been loaded + flopr #(1) EndFetchStateFlop(clk, reset, DCacheMemWriteEnable, EndFetchState); + + // Machinery to request the correct addresses from main memory + always_comb begin + MemReadM = FetchState & ~EndFetchState & ~DCacheMemWriteEnable; + LineAlignedPCPF = {DCacheMemUpperPAdr, DCacheMemLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; + MemPAdrM = LineAlignedPCPF + FetchWordNum*(`XLEN/8); + NextFetchWordNum = FetchState ? FetchWordNum+MemAckW : {LOGWPL+1{1'b0}}; + end + + // Write to cache memory when we have the line here + always_comb begin + DCacheMemWritePAdr = LineAlignedPCPF; + DCacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState; + end + + // Stall the pipeline while loading a new line from memory + always_comb begin + DCacheStallW = FetchState | ~DCacheMemReadValid; + end +endmodule diff --git a/wally-pipelined/src/dmem/dmem.sv b/wally-pipelined/src/dmem/dmem.sv index 1eb6fa827..16813629c 100644 --- a/wally-pipelined/src/dmem/dmem.sv +++ b/wally-pipelined/src/dmem/dmem.sv @@ -62,6 +62,14 @@ module dmem ( logic SquashSCM; logic DTLBPageFaultM; + logic MemAccessM; + + logic [1:0] CurrState, NextState; + + localparam STATE_READY = 0; + localparam STATE_FETCH = 1; + localparam STATE_STALLED = 2; + tlb #(.ENTRY_BITS(3), .ITLB(0)) dtlb(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .PageTableEntryWrite(PageTableEntryM), .PageTypeWrite(PageTypeM), @@ -85,8 +93,9 @@ module dmem ( // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit - assign MemReadM = MemRWM[1] & ~DataMisalignedM; - assign MemWriteM = MemRWM[0] & ~DataMisalignedM && ~SquashSCM; + assign MemReadM = MemRWM[1] & ~DataMisalignedM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWM[0] & ~DataMisalignedM && ~SquashSCM & CurrState != STATE_STALLED; + assign MemAccessM = |MemRWM; // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; @@ -122,5 +131,30 @@ module dmem ( // Data stall //assign DataStall = 0; + // Ross Thompson April 22, 2021 + // for now we need to handle the issue where the data memory interface repeately + // requests data from memory rather than issuing a single request. + + + flopr #(2) stateReg(.clk(clk), + .reset(reset), + .d(NextState), + .q(CurrState)); + + always_comb begin + case (CurrState) + STATE_READY: if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH; + else NextState = STATE_READY; + STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY; + else if (MemAckW & StallW) NextState = STATE_STALLED; + else NextState = STATE_FETCH; + STATE_STALLED: if (~StallW) NextState = STATE_READY; + else NextState = STATE_STALLED; + default: NextState = STATE_READY; + endcase // case (CurrState) + end + + + endmodule diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index ccdead7da..b14e7bcbe 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -45,6 +45,7 @@ module ahblite ( input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram input logic InstrReadF, output logic [`XLEN-1:0] InstrRData, + output logic InstrAckF, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, input logic MemReadM, MemWriteM, @@ -77,7 +78,8 @@ module ahblite ( output logic [3:0] HSIZED, output logic HWRITED, // Stalls - output logic InstrStall,/*InstrUpdate, */DataStall + output logic /*InstrUpdate, */DataStall, + output logic MemAckW // *** add a chip-level ready signal as part of handshake ); @@ -185,6 +187,8 @@ module ahblite ( assign MMUReady = (BusState == MMUTRANSLATE && NextBusState == IDLE); assign InstrRData = HRDATA; + assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD) || (BusState == INSTRREADC) && (NextBusState != INSTRREADC); + assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE); assign MMUReadPTE = HRDATA; assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 950bd3ba4..9e8880edc 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -31,7 +31,7 @@ module hazard( // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic InstrStall, DataStall, ICacheStallF, + input logic DataStall, ICacheStallF, input logic DivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -57,13 +57,12 @@ module hazard( assign BranchFlushDE = BPPredWrongE | RetM | TrapM; - assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); + assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE; assign StallMCause = 0; - assign StallWCause = DataStall | InstrStall; - + assign StallWCause = DataStall | ICacheStallF; // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index ca771e404..718564636 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -31,11 +31,12 @@ module icache( input logic StallF, StallD, input logic FlushD, // Upper bits of physical address for PC - input logic [`XLEN-1:12] UpperPCPF, + input logic [`XLEN-1:12] UpperPCNextPF, // Lower 12 bits of virtual PC address, since it's faster this way - input logic [11:0] LowerPCF, + input logic [11:0] LowerPCNextF, // Data read in from the ebu unit input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, @@ -44,95 +45,605 @@ module icache( // High if the icache is requesting a stall output logic ICacheStallF, // The raw (not decompressed) instruction that was requested - // If the next instruction is compressed, the upper 16 bits may be anything + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros output logic [31:0] InstrRawD ); - logic DelayF, DelaySideF, FlushDLastCyclen, DelayD; - logic [1:0] InstrDMuxChoice; - logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD; - logic [31:0] InstrF, AlignedInstrD; - // Buffer the last read, for ease of accessing it again - logic LastReadDataValidF; - logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF; + // Configuration parameters + // TODO Move these to a config file + localparam integer ICACHELINESIZE = 256; + localparam integer ICACHENUMLINES = 512; - // instruction for NOP - localparam [31:0] nop = 32'h00000013; + // Input signals to cache memory + logic FlushMem; + logic [`XLEN-1:12] ICacheMemReadUpperPAdr; + logic [11:0] ICacheMemReadLowerAdr; + logic ICacheMemWriteEnable; + logic [ICACHELINESIZE-1:0] ICacheMemWriteData; + logic [`XLEN-1:0] ICacheMemWritePAdr; + logic EndFetchState; + // Output signals from cache memory + logic [`XLEN-1:0] ICacheMemReadData; + logic ICacheMemReadValid; + logic ICacheReadEn; + + rodirectmappedmemre #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES), .WORDSIZE(`XLEN)) + cachemem( + .*, + // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall + .re(ICacheReadEn), + .flush(FlushMem), + .ReadUpperPAdr(ICacheMemReadUpperPAdr), + .ReadLowerAdr(ICacheMemReadLowerAdr), + .WriteEnable(ICacheMemWriteEnable), + .WriteLine(ICacheMemWriteData), + .WritePAdr(ICacheMemWritePAdr), + .DataWord(ICacheMemReadData), + .DataValid(ICacheMemReadValid) + ); - // Temporary change to bridge the new interface to old behaviors - logic [`XLEN-1:0] PCPF; - assign PCPF = {UpperPCPF, LowerPCF}; + icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); + // For now, assume no writes to executable memory + assign FlushMem = 1'b0; +endmodule + +module icachecontroller #(parameter LINESIZE = 256) ( + // Inputs from pipeline + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + + // Input the address to read + // The upper bits of the physical pc + input logic [`XLEN-1:12] UpperPCNextPF, + // The lower bits of the virtual pc + input logic [11:0] LowerPCNextF, + + // Signals to/from cache memory + // The read coming out of it + input logic [31:0] ICacheMemReadData, + input logic ICacheMemReadValid, + // The address at which we want to search the cache memory + output logic [`XLEN-1:12] ICacheMemReadUpperPAdr, + output logic [11:0] ICacheMemReadLowerAdr, + output logic ICacheReadEn, + // Load data into the cache + output logic ICacheMemWriteEnable, + output logic [LINESIZE-1:0] ICacheMemWriteData, + output logic [`XLEN-1:0] ICacheMemWritePAdr, + + // Outputs to rest of ifu + // High if the instruction in the fetch stage is compressed + output logic CompressedF, + // The instruction that was requested + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros + output logic [31:0] InstrRawD, + + // Outputs to pipeline control stuff + output logic ICacheStallF, EndFetchState, + + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + // The read we request from main memory + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF +); + + // FSM states + localparam STATE_READY = 0; + localparam STATE_HIT_SPILL = 1; // spill, block 0 hit + localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. + localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT + localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + + // a challenge is the spill signal gets us out of the ready state and moves us to + // 1 of the 2 spill branches. However the original fsm design had us return to + // the ready state when the spill + hits/misses were fully resolved. The problem + // is the spill signal is based on PCPF so when we return to READY to check if the + // cache has a hit it still expresses spill. We can fix in 1 of two ways. + // 1. we can add 1 extra state at the end of each spill branch to returns the instruction + // to the CPU advancing the CPU and icache to the next instruction. + // 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get + // to the READY state. + // The first first option is more robust and increases the number of states by 2. The + // second option is seams like it should work, but I worry there is a hidden interaction + // between CPU stalling and that register. + // Picking option 1. + + localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. + + + localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data. + localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT + localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT + + localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait + localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT + localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT + localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access, + + localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. + + + localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address? + + localparam AHBByteLength = `XLEN / 8; + localparam AHBOFFETWIDTH = $clog2(AHBByteLength); + + + localparam BlockByteLength = LINESIZE / 8; + localparam OFFSETWIDTH = $clog2(BlockByteLength); + + localparam WORDSPERLINE = LINESIZE/`XLEN; + localparam LOGWPL = $clog2(WORDSPERLINE); + + logic [4:0] CurrState, NextState; + logic hit, spill; + logic SavePC; + logic [1:0] PCMux; + logic CntReset; + logic PreCntEn, CntEn; + logic spillSave; + logic UnalignedSelect; + logic FetchCountFlag; + localparam FetchCountThreshold = WORDSPERLINE - 1; + + logic [LOGWPL:0] FetchCount, NextFetchCount; + + logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF, PCNextPF; + logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF; + + + logic [31:0] FinalInstrRawF; + + logic [15:0] SpillDataBlock0; + + + + // Happy path signals + logic [31:0] AlignedInstrRawD; + + //logic [31:0] AlignedInstrRawF, AlignedInstrRawD; + //logic FlushDLastCycleN; + //logic PCPMisalignedF; + localparam [31:0] NOP = 32'h13; + logic [`XLEN-1:0] PCPF; + + logic reset_q; + + // Misaligned signals + //logic [`XLEN:0] MisalignedInstrRawF; + //logic MisalignedStall; + // Cache fault signals + //logic FaultStall; + + assign PCNextPF = {UpperPCNextPF, LowerPCNextF}; + + flopenl #(`XLEN) PCPFFlop(clk, reset, SavePC & ~StallF, PCPFinalF, `RESET_VECTOR, PCPF); + // on spill we want to get the first 2 bytes of the next cache block. + // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can + // simply add 2 to land on the next cache block. + assign PCSpillF = PCPF + 2'b10; + + // now we have to select between these three PCs + assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextPF; // *** don't like the stallf + //assign PCPreFinalF = PCMux[0] ? PCPF : PCNextPF; // *** don't like the stallf + assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; + + + + // truncate the offset from PCPF for memory address generation + assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH]; + + // Detect if the instruction is compressed + assign CompressedF = FinalInstrRawF[1:0] != 2'b11; + + + // Handle happy path (data in cache, reads aligned) +/* -----\/----- EXCLUDED -----\/----- + + generate + if (`XLEN == 32) begin + assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData; + //assign PCPMisalignedF = PCPF[1] && ~CompressedF; + end else begin + assign AlignedInstrRawF = PCPF[2] + ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) + : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); + //assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF; + end + endgenerate + -----/\----- EXCLUDED -----/\----- */ + + //flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); + //flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); + + //mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); + + // Stall for faults or misaligned reads +/* -----\/----- EXCLUDED -----\/----- + always_comb begin + assign ICacheStallF = FaultStall | MisalignedStall; + end + -----/\----- EXCLUDED -----/\----- */ + + + // Handle misaligned, noncompressed reads + +/* -----\/----- EXCLUDED -----\/----- + logic MisalignedState, NextMisalignedState; + logic [15:0] MisalignedHalfInstrF; + logic [15:0] UpperHalfWord; + -----/\----- EXCLUDED -----/\----- */ + +/* -----\/----- EXCLUDED -----\/----- + flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF); + flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState); + -----/\----- EXCLUDED -----/\----- */ + + // When doing a misaligned read, swizzle the bits correctly +/* -----\/----- EXCLUDED -----\/----- + generate + if (`XLEN == 32) begin + assign UpperHalfWord = ICacheMemReadData[31:16]; + end else begin + assign UpperHalfWord = ICacheMemReadData[63:48]; + end + endgenerate + always_comb begin + if (MisalignedState) begin + assign MisalignedInstrRawF = {16'b0, UpperHalfWord}; + end else begin + assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF}; + end + end + -----/\----- EXCLUDED -----/\----- */ + + // Manage internal state and stall when necessary +/* -----\/----- EXCLUDED -----\/----- + always_comb begin + assign MisalignedStall = PCPMisalignedF & MisalignedState; + assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState; + end + -----/\----- EXCLUDED -----/\----- */ + + // Pick the correct address to read +/* -----\/----- EXCLUDED -----\/----- + generate + if (`XLEN == 32) begin + assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; + end else begin + assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; + end + endgenerate + -----/\----- EXCLUDED -----/\----- */ + // TODO Handle reading instructions that cross page boundaries + //assign ICacheMemReadUpperPAdr = UpperPCNextPF; + + + // Handle cache faults + + +/* -----\/----- EXCLUDED -----\/----- + logic FetchState, BeginFetchState; + logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; + logic [`XLEN-1:0] LineAlignedPCPF; + + flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); + flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); + + + // Enter the fetch state when we hit a cache fault + always_comb begin + BeginFetchState = ~ICacheMemReadValid & ~FetchState & (FetchWordNum == 0); + end + // Exit the fetch state once the cache line has been loaded + flopr #(1) EndFetchStateFlop(clk, reset, ICacheMemWriteEnable, EndFetchState); + + // Machinery to request the correct addresses from main memory + always_comb begin + InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic + LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh? + InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ? + NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable + end + + // Write to cache memory when we have the line here + always_comb begin + ICacheMemWritePAdr = LineAlignedPCPF; + ICacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState; + end + + // Stall the pipeline while loading a new line from memory + always_comb begin + FaultStall = FetchState | ~ICacheMemReadValid; + end + -----/\----- EXCLUDED -----/\----- */ + + // the FSM is always runing, do not stall. + flopr #(5) stateReg(.clk(clk), + .reset(reset), + .d(NextState), + .q(CurrState)); + + assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0; + assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit. + assign FetchCountFlag = FetchCount == FetchCountThreshold; + + // Next state logic + always_comb begin + UnalignedSelect = 1'b0; + CntReset = 1'b0; + PreCntEn = 1'b0; + //InstrReadF = 1'b0; + ICacheMemWriteEnable = 1'b0; + spillSave = 1'b0; + PCMux = 2'b00; + ICacheReadEn = 1'b0; + SavePC = 1'b0; + ICacheStallF = 1'b1; + + case (CurrState) + + STATE_READY: begin + PCMux = 2'b00; + ICacheReadEn = 1'b1; + if (hit & ~spill) begin + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; + end else if (hit & spill) begin + spillSave = 1'b1; + PCMux = 2'b10; + NextState = STATE_HIT_SPILL; + end else if (~hit & ~spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_FETCH_WDV; + end else if (~hit & spill) begin + CntReset = 1'b1; + PCMux = 2'b10; + NextState = STATE_MISS_SPILL_FETCH_WDV; + end else begin + NextState = STATE_READY; + end + end + + // branch 1, hit spill and 2, miss spill hit + STATE_HIT_SPILL: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + if (hit) begin + NextState = STATE_HIT_SPILL_FINAL; + end else + CntReset = 1'b1; + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + STATE_HIT_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + end + STATE_HIT_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_HIT_SPILL_MERGE; + end + STATE_HIT_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + NextState = STATE_HIT_SPILL_FINAL; + end + STATE_HIT_SPILL_FINAL: begin + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + NextState = STATE_READY; + ICacheStallF = 1'b0; + end + + // branch 3 miss no spill + STATE_MISS_FETCH_WDV: begin + PCMux = 2'b01; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + STATE_MISS_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_READ; + end + STATE_MISS_READ: begin + PCMux = 2'b01; + ICacheReadEn = 1'b1; + NextState = STATE_READY; + end + + // branch 4 miss spill hit, and 5 miss spill miss + STATE_MISS_SPILL_FETCH_WDV: begin + PCMux = 2'b01; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_FETCH_WDV; + end + end + STATE_MISS_SPILL_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_READ1; + end + STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. + PCMux = 2'b10; // there is a 1 cycle delay after setting the address before the date arrives. + spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2; + end + STATE_MISS_SPILL_2: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + if (~hit) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end else begin + NextState = STATE_MISS_SPILL_FINAL; + end + end + STATE_MISS_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end + end + STATE_MISS_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_MERGE; + end + STATE_MISS_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_FINAL; + end + STATE_MISS_SPILL_FINAL: begin + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; + end + default: begin + PCMux = 2'b01; + NextState = STATE_READY; + end + // *** add in error handling and invalidate/evict + endcase + end + + // fsm outputs + // stall CPU any time we are not in the ready state. any other state means the + // cache is either requesting data from the memory interface or handling a + // spill over two cycles. + // *** BUG this logic will need to change + //assign ICacheStallF = ((CurrState != STATE_READY) | ~hit | spill) | reset_q ? 1'b1 : 1'b0; + // save the PC anytime we are in the ready state. The saved value will be used as the PC may not be stable. + //assign SavePC = ((CurrState == STATE_READY) & hit) & ~spill ? 1'b1 : 1'b0; + assign CntEn = PreCntEn & InstrAckF; + + assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) || + (CurrState == STATE_MISS_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV); + + // to compute the fetch address we need to add the bit shifted + // counter output to the address. + + flopenr #(LOGWPL+1) + FetchCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextFetchCount), + .q(FetchCount)); + + assign NextFetchCount = FetchCount + 1'b1; + + // This part is confusing. + // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide + // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. + // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with + // more zeros after the addition. This will be the number of offset bits less the AHBByteLength. + // *** now a bug need to mux between PCPF and PCPF+2 + assign InstrPAdrF = {{PCPTrunkF, {{LOGWPL}{1'b0}}} + FetchCount, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; + + + // store read data from memory interface before writing into SRAM. + genvar i; + generate + for (i = 0; i < WORDSPERLINE; i++) begin + flopenr #(`XLEN) flop(.clk(clk), + .reset(reset), + .en(InstrAckF & (i == FetchCount)), + .d(InstrInF), + .q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN])); + end + endgenerate + + // what address is used to write the SRAM? + + + // spills require storing the first cache block so it can merged + // with the second + // can optimize size, for now just make it the size of the data + // leaving the cache memory. + flopenr #(16) SpillInstrReg(.clk(clk), + .en(spillSave), + .reset(reset), + .d(ICacheMemReadData[15:0]), + .q(SpillDataBlock0)); + + // use the not quite final PC to do the final selection. + logic [1:1] PCPreFinalF_q; + flopenr #(1) PCFReg(.clk(clk), + .reset(reset), + .en(~StallF), + .d(PCPreFinalF[1]), + .q(PCPreFinalF_q[1])); + assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData; + + // There is a frustrating issue on the first access. + // The cache will not contain any valid data but will contain x's on + // reset. This makes FinalInstrRawF invalid. On the first cycle out of + // reset this register will pickup this x and it will propagate throughout + // the cpu causing simulation failure, most likely a trap for invalid instruction. + // Reset must be held 1 cycle longer to prevent this issue. additionally the + // reset should be to a NOP rather than 0. + + // register reset + flop #(1) resetReg (.clk(clk), + .d(reset), + .q(reset_q)); + + flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FinalInstrRawF, NOP, AlignedInstrRawD); + // cannot have this mux as it creates a combo loop. // This flop doesn't stall if StallF is high because we should output a nop // when FlushD happens, even if the pipeline is also stalled. flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); + mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD); + //assign InstrRawD = AlignedInstrRawD; + + + assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF; - flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD); - flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF); - // This flop stores the first half of a misaligned instruction while waiting for the other half - flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD); + assign ICacheMemWritePAdr = PCPFinalF; - // This flop is here to simulate pulling data out of the cache, which is edge-triggered - flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD); - - // These flops cache the previous read, to accelerate things - flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF); - flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF); - flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF); - - // Decide which address needs to be fetched and sent out over InstrPAdrF - // If the requested address fits inside one read from memory, we fetch that - // address, adjusted to the bit width. Otherwise, we request the lower word - // and then the upper word, in that order. - generate - if (`XLEN == 32) begin - assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF; - end else begin - assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}; - end - endgenerate - - // Read from memory if we don't have the address we want - always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin - InstrReadF = 0; - end else begin - InstrReadF = 1; - end - - // Pick from the memory input or from the previous read, as appropriate - mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF); - - // If the instruction fits in one memory read, then we put the right bits - // into InstrF. Otherwise, we activate DelayF to signal the rest of the - // machinery to swizzle bits. - generate - if (`XLEN == 32) begin - assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF; - assign DelayF = PCPF[1]; - assign MisalignedHalfInstrF = InDataF[31:16]; - end else begin - assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]); - assign DelayF = PCPF[1] && PCPF[2]; - assign MisalignedHalfInstrF = InDataF[63:48]; - end - endgenerate - // We will likely need to stall later, but stalls are handled by the rest of the pipeline for now - assign ICacheStallF = 0; - - // Detect if the instruction is compressed - assign CompressedF = InstrF[1:0] != 2'b11; - - // Pick the correct output, depending on whether we have to assemble this - // instruction from two reads or not. - // Output the requested instruction (we don't need to worry if the read is - // incomplete, since the pipeline stalls for us when it isn't), or a NOP for - // the cycle when the first of two reads comes in. - always_comb if (~FlushDLastCyclen) begin - InstrDMuxChoice = 2'b10; - end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin - InstrDMuxChoice = 2'b11; - end else begin - InstrDMuxChoice = {1'b0, DelayD}; - end - mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD); + + endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 388352d2e..22af97ac3 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -32,6 +32,7 @@ module ifu ( input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, output logic InstrReadF, @@ -72,10 +73,11 @@ module ifu ( logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM; logic PrivilegedChangePCM; logic IllegalCompInstrD; - logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF; + logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCNextPF; logic CompressedF; logic [31:0] InstrRawD, InstrE, InstrW; localparam [31:0] nop = 32'h00000013; // instruction for NOP + logic reset_q; // *** look at this later. tlb #(.ENTRY_BITS(3), .ITLB(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .PageTableEntryWrite(PageTableEntryF), .PageTypeWrite(PageTypeF), @@ -86,7 +88,7 @@ module ifu ( // branch predictor signals logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F; + logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F, PCNext2F, PCNext3F; logic [3:0] InstrClassD, InstrClassE; @@ -96,10 +98,11 @@ module ifu ( // assign InstrReadF = 1; // *** & ICacheMissF; add later // jarred 2021-03-14 Add instrution cache block to remove rd2 - icache ic( + assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live + icache icache( .*, - .UpperPCPF(PCPF[`XLEN-1:12]), - .LowerPCF(PCF[11:0]) + .UpperPCNextPF(PCNextPF[`XLEN-1:12]), + .LowerPCNextF(PCNextPF[11:0]) ); assign PrivilegedChangePCM = RetM | TrapM; @@ -118,7 +121,26 @@ module ifu ( mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), - .y(UnalignedPCNextF)); + .y(PCNext2F)); + + // *** try to remove this in the future as it can add a long path. + // StallF may arrive late. +/* -----\/----- EXCLUDED -----\/----- + mux2 #(`XLEN) pcmux3(.d0(PCNext2F), + .d1(PCF), + .s(StallF), + .y(PCNext3F)); + -----/\----- EXCLUDED -----/\----- */ + + mux2 #(`XLEN) pcmux4(.d0(PCNext2F), + .d1(`RESET_VECTOR), + .s(reset_q), + .y(UnalignedPCNextF)); + + flop #(1) resetReg (.clk(clk), + .d(reset), + .q(reset_q)); + assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 46803ff0b..69275e9c7 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -130,8 +130,8 @@ module wallypipelinedhart ( logic [`XLEN-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; - logic DataStall, InstrStall; - logic InstrAckD, MemAckW; + logic DataStall; + logic InstrAckF, MemAckW; logic BPPredWrongE, BPPredWrongM; logic [3:0] InstrClassM; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index cc4bcccb7..334dca78c 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -447,9 +447,10 @@ module testbench(); // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, - dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName, - InstrEName, InstrMName, InstrWName); + dut.hart.ifu.icache.controller.FinalInstrRawF, + dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, + dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, + InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests initial