diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 5ba353aab..6023519de 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -25,6 +25,7 @@ add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePen add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/RetM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/TrapM add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/DataStall add wave -noupdate -group HDU -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF @@ -37,25 +38,53 @@ add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/ add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPPredF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePC -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdateEN -add wave -noupdate -expand -group Bpred -expand -group update -expand -group dir /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/UpdatePrediction -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC -add wave -noupdate -expand -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE -add wave -noupdate -expand -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -expand -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE +add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPInstrClassE[0]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredDirWrongE +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} -divider {class check} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightNonCFI +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongCFI +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongNonCFI +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPRight +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPWrong +add wave -noupdate -group Bpred -radix hexadecimal -childformat {{{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} -radix binary} {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} -radix binary}} -subitemconfig {{/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} {-height 16 -radix binary} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} {-height 16 -radix binary}} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRNext +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRUpdateEN +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr0 +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr1 +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateEN +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHRLookup +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHT/RA1 +add wave -noupdate -group Bpred -expand -group prediction -radix binary /testbench/dut/hart/ifu/bpred/bpred/BPPredF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBValidF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BPInstrClassF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/BTBPredPCF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/RASPCF +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/LookUpPCIndex +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/TargetPC +add wave -noupdate -group Bpred -expand -group prediction -expand -group ex -radix binary /testbench/dut/hart/ifu/bpred/bpred/BPPredE +add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/hart/ifu/bpred/bpred/PCSrcE +add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/hart/ifu/bpred/bpred/BPPredDirWrongE +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePCIndex +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateEN +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdatePC +add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/hart/ifu/bpred/bpred/TargetPredictor/UpdateTarget +add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr +add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PCE +add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/PHT/WA1 +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/TargetWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/FallThroughWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionPCWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/InstrClassE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/PredictionInstrClassWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE +add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE +add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE @@ -130,13 +159,12 @@ add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/Write add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/ALUResultE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcAE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE -add wave -noupdate /testbench/dut/hart/ieu/dp/ALUResultM -add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF -add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCM -add wave -noupdate -expand -group PCS /testbench/PCW +add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -group PCS /testbench/dut/hart/PCF +add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCD +add wave -noupdate -group PCS /testbench/dut/hart/PCE +add wave -noupdate -group PCS /testbench/dut/hart/PCM +add wave -noupdate -group PCS /testbench/PCW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/InstrD add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcAE add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcBE @@ -156,48 +184,47 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBByteLength -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBOFFETWIDTH -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/BlockByteLength -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/OFFSETWIDTH -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/WORDSPERLINE -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LOGWPL -add wave -noupdate -expand -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LINESIZE -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValid -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataTag -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF +add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBByteLength +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/AHBOFFETWIDTH +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/BlockByteLength +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/OFFSETWIDTH +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/WORDSPERLINE +add wave -noupdate -group icache -group parameters /testbench/dut/hart/ifu/icache/controller/LOGWPL +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValid +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataTag +add wave -noupdate -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA @@ -214,10 +241,12 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF +add wave -noupdate -group csr -color Aquamarine -label {br executed} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} +add wave -noupdate -group csr -color Aquamarine -label {br miss predicted} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} +add wave -noupdate -group csr -childformat {{{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -radix unsigned} {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -radix unsigned}} -subitemconfig {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} {-height 16 -radix unsigned} {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} {-height 16 -radix unsigned}} /testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {252630976 ns} 1} {{Cursor 4} {72696935 ns} 0} {{Cursor 3} {252631405 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {2172501 ns} 0} +quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -232,4 +261,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {252630882 ns} {252631198 ns} +WaveRestoreZoom {0 ns} {391986 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv new file mode 100644 index 000000000..8bf19bd64 --- /dev/null +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -0,0 +1,471 @@ +/////////////////////////////////////////// +// icache.sv +// +// Written: ross1728@gmail.com June 04, 2021 +// Modified: +// +// Purpose: I Cache controller +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module ICacheCntrl #(parameter BLOCKLEN = 256) ( + // Inputs from pipeline + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, + + // Input the address to read + // The upper bits of the physical pc + input logic [`XLEN-1:0] PCNextF, + input logic [`XLEN-1:0] PCPF, + // Signals to/from cache memory + // The read coming out of it + input logic [31:0] ICacheMemReadData, + input logic ICacheMemReadValid, + // The address at which we want to search the cache memory + output logic [`XLEN-1:0] PCTagF, + output logic [`XLEN-1:0] PCNextIndexF, + output logic ICacheReadEn, + // Load data into the cache + output logic ICacheMemWriteEnable, + output logic [BLOCKLEN-1:0] ICacheMemWriteData, + + // Outputs to rest of ifu + // High if the instruction in the fetch stage is compressed + output logic CompressedF, + // The instruction that was requested + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros + output logic [31:0] InstrRawD, + + // Outputs to pipeline control stuff + output logic ICacheStallF, EndFetchState, + + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + // The read we request from main memory + output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF +); + + // FSM states + localparam STATE_READY = 0; + localparam STATE_HIT_SPILL = 1; // spill, block 0 hit + localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. + localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT + localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + + // a challenge is the spill signal gets us out of the ready state and moves us to + // 1 of the 2 spill branches. However the original fsm design had us return to + // the ready state when the spill + hits/misses were fully resolved. The problem + // is the spill signal is based on PCPF so when we return to READY to check if the + // cache has a hit it still expresses spill. We can fix in 1 of two ways. + // 1. we can add 1 extra state at the end of each spill branch to returns the instruction + // to the CPU advancing the CPU and icache to the next instruction. + // 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get + // to the READY state. + // The first first option is more robust and increases the number of states by 2. The + // second option is seams like it should work, but I worry there is a hidden interaction + // between CPU stalling and that register. + // Picking option 1. + + localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. + + + localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data. + localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT + localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT + + localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait + localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT + localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT + localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, + + localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. + + + localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? + + localparam AHBByteLength = `XLEN / 8; + localparam AHBOFFETWIDTH = $clog2(AHBByteLength); + + + localparam BlockByteLength = BLOCKLEN / 8; + localparam OFFSETWIDTH = $clog2(BlockByteLength); + + localparam WORDSPERLINE = BLOCKLEN/`XLEN; + localparam LOGWPL = $clog2(WORDSPERLINE); + + logic [4:0] CurrState, NextState; + logic hit, spill; + logic SavePC; + logic [1:0] PCMux; + logic CntReset; + logic PreCntEn, CntEn; + logic spillSave; + logic UnalignedSelect; + logic FetchCountFlag; + localparam FetchCountThreshold = WORDSPERLINE - 1; + + logic [LOGWPL:0] FetchCount, NextFetchCount; + + logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF; + logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF; + + + logic [31:0] FinalInstrRawF; + + logic [15:0] SpillDataBlock0; + + logic FlushDLastCyclen; + + // Happy path signals + logic [31:0] AlignedInstrRawD; + + //logic [31:0] AlignedInstrRawF, AlignedInstrRawD; + //logic FlushDLastCycleN; + //logic PCPMisalignedF; + localparam [31:0] NOP = 32'h13; + //logic [`XLEN-1:0] PCPF; + + logic reset_q; + logic [1:0] PCMux_q; + + + // Misaligned signals + //logic [`XLEN:0] MisalignedInstrRawF; + //logic MisalignedStall; + // Cache fault signals + //logic FaultStall; + + // on spill we want to get the first 2 bytes of the next cache block. + // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can + // simply add 2 to land on the next cache block. + assign PCSpillF = PCPF + 2'b10; + + // now we have to select between these three PCs + assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf, but it is necessary + assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; + + // this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later. + // *** read enable may not be necessary. + flopenr #(2) PCMuxReg(.clk(clk), + .reset(reset), + .en(ICacheReadEn), + .d(PCMux), + .q(PCMux_q)); + + assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF; + assign PCNextIndexF = PCPFinalF; + + // truncate the offset from PCPF for memory address generation + assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH]; + + // Detect if the instruction is compressed + assign CompressedF = FinalInstrRawF[1:0] != 2'b11; + + + // the FSM is always runing, do not stall. + flopr #(5) stateReg(.clk(clk), + .reset(reset), + .d(NextState), + .q(CurrState)); + + assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0; + assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit. + assign FetchCountFlag = FetchCount == FetchCountThreshold; + + // Next state logic + always_comb begin + UnalignedSelect = 1'b0; + CntReset = 1'b0; + PreCntEn = 1'b0; + //InstrReadF = 1'b0; + ICacheMemWriteEnable = 1'b0; + spillSave = 1'b0; + PCMux = 2'b00; + ICacheReadEn = 1'b0; + SavePC = 1'b0; + ICacheStallF = 1'b1; + + case (CurrState) + + STATE_READY: begin + PCMux = 2'b00; + ICacheReadEn = 1'b1; + if (hit & ~spill) begin + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; + end else if (hit & spill) begin + spillSave = 1'b1; + PCMux = 2'b10; + NextState = STATE_HIT_SPILL; + end else if (~hit & ~spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_FETCH_WDV; + end else if (~hit & spill) begin + CntReset = 1'b1; + PCMux = 2'b01; + NextState = STATE_MISS_SPILL_FETCH_WDV; + end else begin + NextState = STATE_READY; + end + end + + // branch 1, hit spill and 2, miss spill hit + STATE_HIT_SPILL: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + if (hit) begin + NextState = STATE_HIT_SPILL_FINAL; + end else begin + CntReset = 1'b1; + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + end + STATE_HIT_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + end + STATE_HIT_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_HIT_SPILL_MERGE; + end + STATE_HIT_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + NextState = STATE_HIT_SPILL_FINAL; + end + STATE_HIT_SPILL_FINAL: begin + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + NextState = STATE_READY; + ICacheStallF = 1'b0; + end + + // branch 3 miss no spill + STATE_MISS_FETCH_WDV: begin + PCMux = 2'b01; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + STATE_MISS_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_READ; + end + STATE_MISS_READ: begin + PCMux = 2'b01; + ICacheReadEn = 1'b1; + NextState = STATE_READY; + end + + // branch 4 miss spill hit, and 5 miss spill miss + STATE_MISS_SPILL_FETCH_WDV: begin + PCMux = 2'b01; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_FETCH_WDV; + end + end + STATE_MISS_SPILL_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_READ1; + end + STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. + PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2; + end + STATE_MISS_SPILL_2: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; + end + STATE_MISS_SPILL_2_START: begin + if (~hit) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end else begin + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + end + end + STATE_MISS_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end + end + STATE_MISS_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_MERGE; + end + STATE_MISS_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_FINAL; + end + STATE_MISS_SPILL_FINAL: begin + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; + end + default: begin + PCMux = 2'b01; + NextState = STATE_READY; + end + // *** add in error handling and invalidate/evict + endcase + end + + assign CntEn = PreCntEn & InstrAckF; + assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) || + (CurrState == STATE_MISS_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV); + + // to compute the fetch address we need to add the bit shifted + // counter output to the address. + + flopenr #(LOGWPL+1) + FetchCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextFetchCount), + .q(FetchCount)); + + assign NextFetchCount = FetchCount + 1'b1; + + // This part is confusing. + // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide + // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. + // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with + // more zeros after the addition. This will be the number of offset bits less the AHBByteLength. + logic [`XLEN-1:OFFSETWIDTH-LOGWPL] PCPTrunkExtF, InstrPAdrTrunkF ; + + assign PCPTrunkExtF = {PCPTrunkF, {{LOGWPL}{1'b0}}}; + assign InstrPAdrTrunkF = PCPTrunkExtF + FetchCount; + + //assign InstrPAdrF = {{PCPTrunkF, {{LOGWPL}{1'b0}}} + FetchCount, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; + assign InstrPAdrF = {InstrPAdrTrunkF, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; + + + + // store read data from memory interface before writing into SRAM. + genvar i; + generate + for (i = 0; i < WORDSPERLINE; i++) begin + flopenr #(`XLEN) flop(.clk(clk), + .reset(reset), + .en(InstrAckF & (i == FetchCount)), + .d(InstrInF), + .q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN])); + end + endgenerate + + // what address is used to write the SRAM? + + + // spills require storing the first cache block so it can merged + // with the second + // can optimize size, for now just make it the size of the data + // leaving the cache memory. + flopenr #(16) SpillInstrReg(.clk(clk), + .en(spillSave), + .reset(reset), + .d(ICacheMemReadData[15:0]), + .q(SpillDataBlock0)); + + // use the not quite final PC to do the final selection. + logic [1:1] PCPreFinalF_q; + flopenr #(1) PCFReg(.clk(clk), + .reset(reset), + .en(~StallF), + .d(PCPreFinalF[1]), + .q(PCPreFinalF_q[1])); + assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData; + + // There is a frustrating issue on the first access. + // The cache will not contain any valid data but will contain x's on + // reset. This makes FinalInstrRawF invalid. On the first cycle out of + // reset this register will pickup this x and it will propagate throughout + // the cpu causing simulation failure, most likely a trap for invalid instruction. + // Reset must be held 1 cycle longer to prevent this issue. additionally the + // reset should be to a NOP rather than 0. + + // register reset + flop #(1) resetReg (.clk(clk), + .d(reset), + .q(reset_q)); + + flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FinalInstrRawF, NOP, AlignedInstrRawD); + // cannot have this mux as it creates a combo loop. + // This flop doesn't stall if StallF is high because we should output a nop + // when FlushD happens, even if the pipeline is also stalled. + flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); + mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD); + //assign InstrRawD = AlignedInstrRawD; + + +endmodule diff --git a/wally-pipelined/src/cache/icacheMem.sv b/wally-pipelined/src/cache/ICacheMem.sv similarity index 83% rename from wally-pipelined/src/cache/icacheMem.sv rename to wally-pipelined/src/cache/ICacheMem.sv index 9c1321dd9..d3e876cfc 100644 --- a/wally-pipelined/src/cache/icacheMem.sv +++ b/wally-pipelined/src/cache/ICacheMem.sv @@ -1,6 +1,6 @@ `include "wally-config.vh" -module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, parameter WORDSIZE = `XLEN) ( +module ICacheMem #(parameter NUMLINES=512, parameter BLOCKLEN = 256) ( // Pipeline stuff input logic clk, input logic reset, @@ -12,15 +12,15 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p input logic [`XLEN-1:0] PCNextIndexF, // Write new data to the cache input logic WriteEnable, - input logic [LINESIZE-1:0] WriteLine, + input logic [BLOCKLEN-1:0] WriteLine, // Output the word, as well as if it is valid - output logic [31:0] DataWord, // *** was WORDSIZE-1 + output logic [31:0] DataWord, // *** was `XLEN-1 output logic DataValid ); // Various compile-time constants - localparam integer WORDWIDTH = $clog2(WORDSIZE/8); - localparam integer OFFSETWIDTH = $clog2(LINESIZE/WORDSIZE); + localparam integer WORDWIDTH = $clog2(`XLEN/8); + localparam integer OFFSETWIDTH = $clog2(BLOCKLEN/`XLEN); localparam integer SETWIDTH = $clog2(NUMLINES); localparam integer TAGWIDTH = `XLEN - OFFSETWIDTH - SETWIDTH - WORDWIDTH; @@ -32,8 +32,8 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p localparam integer TAGEND = TAGBEGIN + TAGWIDTH - 1; // Machinery to read from and write to the correct addresses in memory - logic [LINESIZE-1:0] ReadLine; - logic [LINESIZE/WORDSIZE-1:0][WORDSIZE-1:0] ReadLineTransformed; + logic [BLOCKLEN-1:0] ReadLine; + logic [BLOCKLEN/`XLEN-1:0][`XLEN-1:0] ReadLineTransformed; // Machinery to check if a given read is valid and is the desired value logic [TAGWIDTH-1:0] DataTag; @@ -41,7 +41,7 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p logic DataValidBit; // Depth is number of bits in one "word" of the memory, width is number of such words - sram1rw #(.DEPTH(LINESIZE), .WIDTH(NUMLINES)) cachemem ( + sram1rw #(.DEPTH(BLOCKLEN), .WIDTH(NUMLINES)) cachemem ( .*, .Addr(PCNextIndexF[SETEND:SETBEGIN]), .ReadData(ReadLine), @@ -82,8 +82,8 @@ module rodirectmappedmemre #(parameter NUMLINES=512, parameter LINESIZE = 256, p end genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin - assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; + for (i=0; i < BLOCKLEN/`XLEN; i++) begin + assign ReadLineTransformed[i] = ReadLine[(i+1)*`XLEN-1:i*`XLEN]; end endgenerate diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index 4f51edd79..3f226bb29 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -49,13 +49,13 @@ module icache( // Configuration parameters // TODO Move these to a config file - localparam integer ICACHELINESIZE = 256; - localparam integer ICACHENUMLINES = 512; + localparam integer BLOCKLEN = 256; + localparam integer NUMLINES = 512; // Input signals to cache memory logic FlushMem; logic ICacheMemWriteEnable; - logic [ICACHELINESIZE-1:0] ICacheMemWriteData; + logic [BLOCKLEN-1:0] ICacheMemWriteData; logic EndFetchState; logic [`XLEN-1:0] PCTagF, PCNextIndexF; // Output signals from cache memory @@ -63,7 +63,7 @@ module icache( logic ICacheMemReadValid; logic ICacheReadEn; - rodirectmappedmemre #(.LINESIZE(ICACHELINESIZE), .NUMLINES(ICACHENUMLINES), .WORDSIZE(`XLEN)) + ICacheMem #(.BLOCKLEN(BLOCKLEN), .NUMLINES(NUMLINES)) cachemem( .*, // Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall @@ -74,453 +74,9 @@ module icache( .DataValid(ICacheMemReadValid) ); - icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*); + ICacheCntrl #(.BLOCKLEN(BLOCKLEN)) controller(.*); // For now, assume no writes to executable memory assign FlushMem = 1'b0; endmodule -module icachecontroller #(parameter LINESIZE = 256) ( - // Inputs from pipeline - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, - - // Input the address to read - // The upper bits of the physical pc - input logic [`XLEN-1:0] PCNextF, - input logic [`XLEN-1:0] PCPF, - // Signals to/from cache memory - // The read coming out of it - input logic [31:0] ICacheMemReadData, - input logic ICacheMemReadValid, - // The address at which we want to search the cache memory - output logic [`XLEN-1:0] PCTagF, - output logic [`XLEN-1:0] PCNextIndexF, - output logic ICacheReadEn, - // Load data into the cache - output logic ICacheMemWriteEnable, - output logic [LINESIZE-1:0] ICacheMemWriteData, - - // Outputs to rest of ifu - // High if the instruction in the fetch stage is compressed - output logic CompressedF, - // The instruction that was requested - // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] InstrRawD, - - // Outputs to pipeline control stuff - output logic ICacheStallF, EndFetchState, - - // Signals to/from ahblite interface - // A read containing the requested data - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, - // The read we request from main memory - output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF -); - - // FSM states - localparam STATE_READY = 0; - localparam STATE_HIT_SPILL = 1; // spill, block 0 hit - localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. - localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT - localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. - - // a challenge is the spill signal gets us out of the ready state and moves us to - // 1 of the 2 spill branches. However the original fsm design had us return to - // the ready state when the spill + hits/misses were fully resolved. The problem - // is the spill signal is based on PCPF so when we return to READY to check if the - // cache has a hit it still expresses spill. We can fix in 1 of two ways. - // 1. we can add 1 extra state at the end of each spill branch to returns the instruction - // to the CPU advancing the CPU and icache to the next instruction. - // 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get - // to the READY state. - // The first first option is more robust and increases the number of states by 2. The - // second option is seams like it should work, but I worry there is a hidden interaction - // between CPU stalling and that register. - // Picking option 1. - - localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the - // spill access but does nto consider spill. It also does not do another operation. - - - localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data. - localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT - localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT - - localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait - localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT - localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT - localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, - - localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the - // spill access but does nto consider spill. It also does not do another operation. - - - localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? - - localparam AHBByteLength = `XLEN / 8; - localparam AHBOFFETWIDTH = $clog2(AHBByteLength); - - - localparam BlockByteLength = LINESIZE / 8; - localparam OFFSETWIDTH = $clog2(BlockByteLength); - - localparam WORDSPERLINE = LINESIZE/`XLEN; - localparam LOGWPL = $clog2(WORDSPERLINE); - - logic [4:0] CurrState, NextState; - logic hit, spill; - logic SavePC; - logic [1:0] PCMux; - logic CntReset; - logic PreCntEn, CntEn; - logic spillSave; - logic UnalignedSelect; - logic FetchCountFlag; - localparam FetchCountThreshold = WORDSPERLINE - 1; - - logic [LOGWPL:0] FetchCount, NextFetchCount; - - logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF; - logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF; - - - logic [31:0] FinalInstrRawF; - - logic [15:0] SpillDataBlock0; - - logic FlushDLastCyclen; - - // Happy path signals - logic [31:0] AlignedInstrRawD; - - //logic [31:0] AlignedInstrRawF, AlignedInstrRawD; - //logic FlushDLastCycleN; - //logic PCPMisalignedF; - localparam [31:0] NOP = 32'h13; - //logic [`XLEN-1:0] PCPF; - - logic reset_q; - logic [1:0] PCMux_q; - - - // Misaligned signals - //logic [`XLEN:0] MisalignedInstrRawF; - //logic MisalignedStall; - // Cache fault signals - //logic FaultStall; - - // on spill we want to get the first 2 bytes of the next cache block. - // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can - // simply add 2 to land on the next cache block. - assign PCSpillF = PCPF + 2'b10; - - // now we have to select between these three PCs - assign PCPreFinalF = PCMux[0] | StallF ? PCPF : PCNextF; // *** don't like the stallf, but it is necessary - assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; - - // this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later. - // *** read enable may not be necessary. - flopenr #(2) PCMuxReg(.clk(clk), - .reset(reset), - .en(ICacheReadEn), - .d(PCMux), - .q(PCMux_q)); - - assign PCTagF = PCMux_q[1] ? PCSpillF : PCPF; - assign PCNextIndexF = PCPFinalF; - - // truncate the offset from PCPF for memory address generation - assign PCPTrunkF = PCTagF[`XLEN-1:OFFSETWIDTH]; - - // Detect if the instruction is compressed - assign CompressedF = FinalInstrRawF[1:0] != 2'b11; - - - // the FSM is always runing, do not stall. - flopr #(5) stateReg(.clk(clk), - .reset(reset), - .d(NextState), - .q(CurrState)); - - assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0; - assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit. - assign FetchCountFlag = FetchCount == FetchCountThreshold; - - // Next state logic - always_comb begin - UnalignedSelect = 1'b0; - CntReset = 1'b0; - PreCntEn = 1'b0; - //InstrReadF = 1'b0; - ICacheMemWriteEnable = 1'b0; - spillSave = 1'b0; - PCMux = 2'b00; - ICacheReadEn = 1'b0; - SavePC = 1'b0; - ICacheStallF = 1'b1; - - case (CurrState) - - STATE_READY: begin - PCMux = 2'b00; - ICacheReadEn = 1'b1; - if (hit & ~spill) begin - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; - end else if (hit & spill) begin - spillSave = 1'b1; - PCMux = 2'b10; - NextState = STATE_HIT_SPILL; - end else if (~hit & ~spill) begin - CntReset = 1'b1; - NextState = STATE_MISS_FETCH_WDV; - end else if (~hit & spill) begin - CntReset = 1'b1; - PCMux = 2'b01; - NextState = STATE_MISS_SPILL_FETCH_WDV; - end else begin - NextState = STATE_READY; - end - end - - // branch 1, hit spill and 2, miss spill hit - STATE_HIT_SPILL: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; - if (hit) begin - NextState = STATE_HIT_SPILL_FINAL; - end else begin - CntReset = 1'b1; - NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end - end - STATE_HIT_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end - end - STATE_HIT_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_HIT_SPILL_MERGE; - end - STATE_HIT_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; - NextState = STATE_HIT_SPILL_FINAL; - end - STATE_HIT_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - NextState = STATE_READY; - ICacheStallF = 1'b0; - end - - // branch 3 miss no spill - STATE_MISS_FETCH_WDV: begin - PCMux = 2'b01; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_FETCH_WDV; - end - end - STATE_MISS_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_READ; - end - STATE_MISS_READ: begin - PCMux = 2'b01; - ICacheReadEn = 1'b1; - NextState = STATE_READY; - end - - // branch 4 miss spill hit, and 5 miss spill miss - STATE_MISS_SPILL_FETCH_WDV: begin - PCMux = 2'b01; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_FETCH_WDV; - end - end - STATE_MISS_SPILL_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_READ1; - end - STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. - PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2; - end - STATE_MISS_SPILL_2: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2_START; - end - STATE_MISS_SPILL_2_START: begin - if (~hit) begin - CntReset = 1'b1; - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end else begin - NextState = STATE_READY; - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - end - end - STATE_MISS_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end - end - STATE_MISS_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_MERGE; - end - STATE_MISS_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_FINAL; - end - STATE_MISS_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; - end - default: begin - PCMux = 2'b01; - NextState = STATE_READY; - end - // *** add in error handling and invalidate/evict - endcase - end - - assign CntEn = PreCntEn & InstrAckF; - assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) || - (CurrState == STATE_MISS_FETCH_WDV) || - (CurrState == STATE_MISS_SPILL_FETCH_WDV) || - (CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV); - - // to compute the fetch address we need to add the bit shifted - // counter output to the address. - - flopenr #(LOGWPL+1) - FetchCountReg(.clk(clk), - .reset(reset | CntReset), - .en(CntEn), - .d(NextFetchCount), - .q(FetchCount)); - - assign NextFetchCount = FetchCount + 1'b1; - - // This part is confusing. - // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide - // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. - // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with - // more zeros after the addition. This will be the number of offset bits less the AHBByteLength. - logic [`XLEN-1:OFFSETWIDTH-LOGWPL] PCPTrunkExtF, InstrPAdrTrunkF ; - - assign PCPTrunkExtF = {PCPTrunkF, {{LOGWPL}{1'b0}}}; - assign InstrPAdrTrunkF = PCPTrunkExtF + FetchCount; - - //assign InstrPAdrF = {{PCPTrunkF, {{LOGWPL}{1'b0}}} + FetchCount, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; - assign InstrPAdrF = {InstrPAdrTrunkF, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; - - - - // store read data from memory interface before writing into SRAM. - genvar i; - generate - for (i = 0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(.clk(clk), - .reset(reset), - .en(InstrAckF & (i == FetchCount)), - .d(InstrInF), - .q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN])); - end - endgenerate - - // what address is used to write the SRAM? - - - // spills require storing the first cache block so it can merged - // with the second - // can optimize size, for now just make it the size of the data - // leaving the cache memory. - flopenr #(16) SpillInstrReg(.clk(clk), - .en(spillSave), - .reset(reset), - .d(ICacheMemReadData[15:0]), - .q(SpillDataBlock0)); - - // use the not quite final PC to do the final selection. - logic [1:1] PCPreFinalF_q; - flopenr #(1) PCFReg(.clk(clk), - .reset(reset), - .en(~StallF), - .d(PCPreFinalF[1]), - .q(PCPreFinalF_q[1])); - assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData; - - // There is a frustrating issue on the first access. - // The cache will not contain any valid data but will contain x's on - // reset. This makes FinalInstrRawF invalid. On the first cycle out of - // reset this register will pickup this x and it will propagate throughout - // the cpu causing simulation failure, most likely a trap for invalid instruction. - // Reset must be held 1 cycle longer to prevent this issue. additionally the - // reset should be to a NOP rather than 0. - - // register reset - flop #(1) resetReg (.clk(clk), - .d(reset), - .q(reset_q)); - - flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FinalInstrRawF, NOP, AlignedInstrRawD); - // cannot have this mux as it creates a combo loop. - // This flop doesn't stall if StallF is high because we should output a nop - // when FlushD happens, even if the pipeline is also stalled. - flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen); - mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCyclen, InstrRawD); - //assign InstrRawD = AlignedInstrRawD; - - -endmodule