From 251ece20fec50df6c7cee2bfc61e44ae9bb4626a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 20 Apr 2021 19:55:49 -0500 Subject: [PATCH] Broken icache. Design is done. Time to debug. --- wally-pipelined/regression/wave.do | 148 ++++--- wally-pipelined/src/ifu/icache.sv | 396 ++++++++++++++++-- wally-pipelined/src/ifu/ifu.sv | 20 +- .../testbench/testbench-imperas.sv | 2 +- 4 files changed, 468 insertions(+), 98 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 470cc599..36401fd9 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -26,7 +26,6 @@ add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSR add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/InstrStall add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF @@ -39,11 +38,6 @@ add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbe add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW -add wave -noupdate /testbench/dut/hart/hzu/StallFCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallDCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallECause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallMCause_Q -add wave -noupdate /testbench/dut/hart/hzu/StallWCause_Q add wave -noupdate -group Bpred -expand -group direction -divider Update add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdatePC add wave -noupdate -group Bpred -expand -group direction /testbench/dut/hart/ifu/bpred/Predictor/DirPredictor/UpdateEN @@ -80,36 +74,36 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rf -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/we3 -add wave -noupdate -expand -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3 -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW -add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -expand -group alu -divider internals -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/we3 +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3 +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ALUResultW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW +add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -group alu -divider internals +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate /testbench/InstrFName -add wave -noupdate -expand -group dcache /testbench/dut/hart/MemAdrM -add wave -noupdate -expand -group dcache /testbench/dut/hart/MemPAdrM -add wave -noupdate -expand -group dcache /testbench/dut/hart/WriteDataM -add wave -noupdate -expand -group dcache /testbench/dut/hart/dmem/MemRWM +add wave -noupdate -group dcache /testbench/dut/hart/MemAdrM +add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM +add wave -noupdate -group dcache /testbench/dut/hart/WriteDataM +add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemRWM add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -148,32 +142,64 @@ add wave -noupdate -group {function radix debug} /testbench/functionRadix/functi add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionAddr add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/ProgramAddrIndex add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionName -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/InstrD -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcAE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/SrcBE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/Funct3E -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/W64E -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallM -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/StallW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushM -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/FlushW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/MulDivResultW -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivDoneE -add wave -noupdate -expand -group muldiv /testbench/dut/hart/mdu/DivBusyE -add wave -noupdate /testbench/dut/hart/mdu/genblk1/gclk -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/N -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/D -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/Q -add wave -noupdate -expand -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate /testbench/dut/hart/MulDivResultW -add wave -noupdate /testbench/dut/hart/mdu/genblk1/PrelimResultE -add wave -noupdate /testbench/dut/hart/mdu/Funct3E -add wave -noupdate /testbench/dut/hart/mdu/genblk1/QuotE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/InstrD +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcAE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/SrcBE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/Funct3E +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/W64E +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallM +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/StallW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q +add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/AHBByteLength +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/AHBOFFETWIDTH +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/BlockByteLength +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/OFFSETWIDTH +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/WORDSPERLINE +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/LOGWPL +add wave -noupdate -expand -group parameters /testbench/dut/hart/ifu/icache/controller/LINESIZE +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWritePAdr +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD +add wave -noupdate -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate /testbench/dut/hart/ifu/icache/controller/PCPFinalF +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/DataValid +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/ReadTag +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/DataTag +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/WritePAdr +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/WriteSet +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/WriteTag +add wave -noupdate /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {128433 ns} 0} +WaveRestoreCursors {{Cursor 2} {237 ns} 0} quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 229 @@ -189,4 +215,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {128007 ns} {128663 ns} +WaveRestoreZoom {96 ns} {400 ns} diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv index f6440fcf..573e885a 100644 --- a/wally-pipelined/src/ifu/icache.sv +++ b/wally-pipelined/src/ifu/icache.sv @@ -129,57 +129,141 @@ module icachecontroller #(parameter LINESIZE = 256) ( output logic InstrReadF ); - // Happy path signals - logic [31:0] AlignedInstrRawF, AlignedInstrRawD; - logic FlushDLastCycleN; - logic PCPMisalignedF; - const logic [31:0] NOP = 32'h13; - logic [`XLEN-1:0] PCPF; - // Misaligned signals - logic [`XLEN:0] MisalignedInstrRawF; - logic MisalignedStall; - // Cache fault signals - logic FaultStall; + // FSM states + localparam STATE_READY = 0; + localparam STATE_HIT_SPILL = 1; // spill, block 0 hit + localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. + localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT + localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + localparam STATE_MISS_FETCH_WDV = 5; // aligned miss, issue read to AHB and wait for data. + localparam STATE_MISS_FETCH_DONE = 6; // write data into SRAM/LUT + localparam STATE_MISS_READ = 7; // read block 1 from SRAM/LUT + + localparam STATE_MISS_SPILL_FETCH_WDV = 8; // spill, miss on block 0, issue read to AHB and wait + localparam STATE_MISS_SPILL_FETCH_DONE = 9; // write data into SRAM/LUT + localparam STATE_MISS_SPILL_READ1 = 10; // read block 0 from SRAM/LUT + localparam STATE_MISS_SPILL_2 = 11; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 12; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 13; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 14; // read block 0 of CPU access, + + localparam STATE_INVALIDATE = 15; // *** not sure if invalidate or evict? invalidate by cache block or address? + + localparam AHBByteLength = `XLEN / 8; + localparam AHBOFFETWIDTH = $clog2(AHBByteLength); + + + localparam BlockByteLength = LINESIZE / 8; + localparam OFFSETWIDTH = $clog2(BlockByteLength); + + localparam WORDSPERLINE = LINESIZE/`XLEN; + localparam LOGWPL = $clog2(WORDSPERLINE); + + logic [3:0] CurrState, NextState; + logic hit, spill; + logic SavePC; + logic [1:0] PCMux; + logic CntReset; + logic PreCntEn, CntEn; + logic spillSave; + logic UnalignedSelect; + logic FetchCountFlag; + localparam FetchCountThreshold = WORDSPERLINE - 1; + + logic [LOGWPL:0] FetchCount, NextFetchCount; + + logic [`XLEN-1:0] PCPreFinalF, PCPFinalF, PCSpillF; + logic [`XLEN-1:OFFSETWIDTH] PCPTrunkF; + + + logic [31:0] FinalInstrRawF; + + logic [15:0] SpillDataBlock0; + + + + // Happy path signals + logic [31:0] AlignedInstrRawD; + + //logic [31:0] AlignedInstrRawF, AlignedInstrRawD; + //logic FlushDLastCycleN; + //logic PCPMisalignedF; + const logic [31:0] NOP = 32'h13; + logic [`XLEN-1:0] PCPF; + + logic reset_q; + + // Misaligned signals + //logic [`XLEN:0] MisalignedInstrRawF; + //logic MisalignedStall; + // Cache fault signals + //logic FaultStall; + + + flopenr #(`XLEN) PCPFFlop(clk, reset, SavePC, {UpperPCNextPF, LowerPCNextF}, PCPF); + // on spill we want to get the first 2 bytes of the next cache block. + // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can + // simply add 2 to land on the next cache block. + assign PCSpillF = PCPF + 2'b10; + + // now we have to select between these three PCs + assign PCPreFinalF = PCMux[0] ? PCPF : {UpperPCNextPF, LowerPCNextF}; + assign PCPFinalF = PCMux[1] ? PCSpillF : PCPreFinalF; + + + + // truncate the offset from PCPF for memory address generation + assign PCPTrunkF = PCPFinalF[`XLEN-1:OFFSETWIDTH]; + // Detect if the instruction is compressed - assign CompressedF = AlignedInstrRawF[1:0] != 2'b11; + assign CompressedF = FinalInstrRawF[1:0] != 2'b11; // Handle happy path (data in cache, reads aligned) +/* -----\/----- EXCLUDED -----\/----- generate if (`XLEN == 32) begin assign AlignedInstrRawF = PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData; - assign PCPMisalignedF = PCPF[1] && ~CompressedF; + //assign PCPMisalignedF = PCPF[1] && ~CompressedF; end else begin assign AlignedInstrRawF = PCPF[2] ? (PCPF[1] ? MisalignedInstrRawF : ICacheMemReadData[63:32]) : (PCPF[1] ? ICacheMemReadData[47:16] : ICacheMemReadData[31:0]); - assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF; + //assign PCPMisalignedF = PCPF[2] && PCPF[1] && ~CompressedF; end endgenerate + -----/\----- EXCLUDED -----/\----- */ - flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); - flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); - flopenr #(`XLEN) PCPFFlop(clk, reset, ~StallF, {UpperPCNextPF, LowerPCNextF}, PCPF); - mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); + //flopenr #(32) AlignedInstrRawDFlop(clk, reset, ~StallD, AlignedInstrRawF, AlignedInstrRawD); + //flopr #(1) FlushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCycleN | ~StallF), FlushDLastCycleN); + + //mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, ~FlushDLastCycleN, InstrRawD); // Stall for faults or misaligned reads +/* -----\/----- EXCLUDED -----\/----- always_comb begin assign ICacheStallF = FaultStall | MisalignedStall; end + -----/\----- EXCLUDED -----/\----- */ // Handle misaligned, noncompressed reads +/* -----\/----- EXCLUDED -----\/----- logic MisalignedState, NextMisalignedState; logic [15:0] MisalignedHalfInstrF; logic [15:0] UpperHalfWord; + -----/\----- EXCLUDED -----/\----- */ +/* -----\/----- EXCLUDED -----\/----- flopenr #(16) MisalignedHalfInstrFlop(clk, reset, ~FaultStall & (PCPMisalignedF & MisalignedState), AlignedInstrRawF[15:0], MisalignedHalfInstrF); flopenr #(1) MisalignedStateFlop(clk, reset, ~FaultStall, NextMisalignedState, MisalignedState); + -----/\----- EXCLUDED -----/\----- */ // When doing a misaligned read, swizzle the bits correctly +/* -----\/----- EXCLUDED -----\/----- generate if (`XLEN == 32) begin assign UpperHalfWord = ICacheMemReadData[31:16]; @@ -194,14 +278,18 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign MisalignedInstrRawF = {ICacheMemReadData[15:0], MisalignedHalfInstrF}; end end + -----/\----- EXCLUDED -----/\----- */ // Manage internal state and stall when necessary +/* -----\/----- EXCLUDED -----\/----- always_comb begin assign MisalignedStall = PCPMisalignedF & MisalignedState; assign NextMisalignedState = ~PCPMisalignedF | ~MisalignedState; end + -----/\----- EXCLUDED -----/\----- */ // Pick the correct address to read +/* -----\/----- EXCLUDED -----\/----- generate if (`XLEN == 32) begin assign ICacheMemReadLowerAdr = {LowerPCNextF[11:2] + (PCPMisalignedF & ~MisalignedState), 2'b00}; @@ -209,16 +297,15 @@ module icachecontroller #(parameter LINESIZE = 256) ( assign ICacheMemReadLowerAdr = {LowerPCNextF[11:3] + (PCPMisalignedF & ~MisalignedState), 3'b00}; end endgenerate + -----/\----- EXCLUDED -----/\----- */ // TODO Handle reading instructions that cross page boundaries - assign ICacheMemReadUpperPAdr = UpperPCNextPF; + //assign ICacheMemReadUpperPAdr = UpperPCNextPF; // Handle cache faults - localparam integer WORDSPERLINE = LINESIZE/`XLEN; - localparam integer LOGWPL = $clog2(WORDSPERLINE); - localparam integer OFFSETWIDTH = $clog2(LINESIZE/8); +/* -----\/----- EXCLUDED -----\/----- logic FetchState, BeginFetchState; logic [LOGWPL:0] FetchWordNum, NextFetchWordNum; logic [`XLEN-1:0] LineAlignedPCPF; @@ -226,12 +313,6 @@ module icachecontroller #(parameter LINESIZE = 256) ( flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState); flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum); - genvar i; - generate - for (i=0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); - end - endgenerate // Enter the fetch state when we hit a cache fault always_comb begin @@ -242,10 +323,10 @@ module icachecontroller #(parameter LINESIZE = 256) ( // Machinery to request the correct addresses from main memory always_comb begin - InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; - LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; - InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); - NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; + InstrReadF = FetchState & ~EndFetchState & ~ICacheMemWriteEnable; // next stage logic + LineAlignedPCPF = {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}}; // the fetch address for abh? + InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8); // ? + NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; // convert to enable end // Write to cache memory when we have the line here @@ -258,4 +339,255 @@ module icachecontroller #(parameter LINESIZE = 256) ( always_comb begin FaultStall = FetchState | ~ICacheMemReadValid; end + -----/\----- EXCLUDED -----/\----- */ + + // the FSM is always runing, do not stall. + flopr #(4) stateReg(.clk(clk), + .reset(reset), + .d(NextState), + .q(CurrState)); + + assign spill = PCPF[5:1] == 5'b1_1111 ? 1'b1 : 1'b0; + assign hit = ICacheMemReadValid; // note ICacheMemReadValid is hit. + assign FetchCountFlag = FetchCount == FetchCountThreshold; + + // Next state logic + always_comb begin + UnalignedSelect = 1'b0; + CntReset = 1'b0; + PreCntEn = 1'b0; + InstrReadF = 1'b0; + ICacheMemWriteEnable = 1'b0; + spillSave = 1'b0; + PCMux = 2'b00; + + case (CurrState) + + STATE_READY: begin + PCMux = 2'b00; + if (hit & ~spill) begin + NextState = STATE_READY; + end else if (hit & spill) begin + spillSave = 1'b1; + NextState = STATE_HIT_SPILL; + end else if (~hit & ~spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_FETCH_WDV; + end else if (~hit & spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_FETCH_WDV; + end else begin + NextState = STATE_READY; + end + end + + // branch 1, hit spill and 2, miss spill hit + STATE_HIT_SPILL: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + if (hit) begin + NextState = STATE_READY; + end else + CntReset = 1'b1; + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + STATE_HIT_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end + end + STATE_HIT_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_HIT_SPILL_MERGE; + end + STATE_HIT_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + NextState = STATE_READY; + end + + // branch 3 miss no spill + STATE_MISS_FETCH_WDV: begin + PCMux = 2'b01; + InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + STATE_MISS_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_READ; + end + STATE_MISS_READ: begin + PCMux = 2'b01; + NextState = STATE_READY; + end + + // branch 4 miss spill hit, and 5 miss spill miss + STATE_MISS_SPILL_FETCH_WDV: begin + PCMux = 2'b01; + PreCntEn = 1'b1; + InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_FETCH_WDV; + end + end + STATE_MISS_SPILL_FETCH_DONE: begin + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_READ1; + end + STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. + PCMux = 2'b10; // there is a 1 cycle delay after setting the address before the date arrives. + spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + NextState = STATE_MISS_SPILL_2; + end + STATE_MISS_SPILL_2: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + if (~hit) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end else begin + NextState = STATE_READY; + end + end + STATE_MISS_SPILL_MISS_FETCH_WDV: begin + PCMux = 2'b10; + PreCntEn = 1'b1; + InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end + end + STATE_MISS_SPILL_MISS_FETCH_DONE: begin + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_MERGE; + end + STATE_MISS_SPILL_MERGE: begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + NextState = STATE_READY; + end + default: begin + PCMux = 2'b01; + NextState = STATE_READY; + end + // *** add in error handling and invalidate/evict + endcase + end + + // fsm outputs + // stall CPU any time we are not in the ready state. any other state means the + // cache is either requesting data from the memory interface or handling a + // spill over two cycles. + assign ICacheStallF = (CurrState != STATE_READY) | reset_q ? 1'b1 : 1'b0; + // save the PC anytime we are in the ready state. The saved value will be used as the PC may not be stable. + assign SavePC = CurrState == STATE_READY ? 1'b1 : 1'b0; + assign CntEn = PreCntEn & InstrAckF; + + // to compute the fetch address we need to add the bit shifted + // counter output to the address. + + flopenr #(LOGWPL+1) + FetchCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextFetchCount), + .q(FetchCount)); + + assign NextFetchCount = FetchCount + 1'b1; + + // This part is confusing. + // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide + // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. + // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with + // more zeros after the addition. This will be the number of offset bits less the AHBByteLength. + assign InstrPAdrF = {{PCPTrunkF, {{LOGWPL}{1'b0}}} + FetchCount, {{OFFSETWIDTH-LOGWPL}{1'b0}}}; + + + // store read data from memory interface before writing into SRAM. + genvar i; + generate + for (i = 0; i < AHBByteLength; i++) begin + flopenr #(`XLEN) flop(.clk(clk), + .reset(reset), + .en(InstrAckF & (i == FetchCount)), + .d(InstrInF), + .q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN])); + end + endgenerate + + // what address is used to write the SRAM? + + + // spills require storing the first cache block so it can merged + // with the second + // can optimize size, for now just make it the size of the data + // leaving the cache memory. + flopenr #(16) SpillInstrReg(.clk(clk), + .en(spillSave), + .reset(reset), + .d(ICacheMemReadData[15:0]), + .q(SpillDataBlock0)); + + // use the not quite final PC to do the final selection. + generate + if( `XLEN == 32) begin + logic [1:1] PCPreFinalF_q; + flop #(1) PCFReg(.clk(clk), + .d(PCPreFinalF[1]), + .q(PCPreFinalF_q[1])); + assign FinalInstrRawF = PCPreFinalF[1] ? {SpillDataBlock0, ICacheMemReadData[31:16]} : ICacheMemReadData; + end else begin + logic [2:1] PCPreFinalF_q; + flop #(2) PCFReg(.clk(clk), + .d(PCPreFinalF[2:1]), + .q(PCPreFinalF_q[2:1])); + mux4 #(32) AlignmentMux(.d0(ICacheMemReadData[31:0]), + .d1(ICacheMemReadData[47:16]), + .d2(ICacheMemReadData[63:32]), + .d3({SpillDataBlock0, ICacheMemReadData[63:48]}), + .s(PCPreFinalF[2:1]), + .y(FinalInstrRawF)); + end + endgenerate + + // There is a frustrating issue on the first access. + // The cache will not contain any valid data but will contain x's on + // reset. This makes FinalInstrRawF invalid. On the first cycle out of + // reset this register will pickup this x and it will propagate throughout + // the cpu causing simulation failure, most likely a trap for invalid instruction. + // Reset must be held 1 cycle longer to prevent this issue. additionally the + // reset should be to a NOP rather than 0. + + // register reset + flop #(1) resetReg (.clk(clk), + .d(reset), + .q(reset_q)); + + flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FinalInstrRawF, NOP, AlignedInstrRawD); + mux2 #(32) InstrRawDMux(AlignedInstrRawD, NOP, FlushD, InstrRawD); + + assign {ICacheMemReadUpperPAdr, ICacheMemReadLowerAdr} = PCPFinalF; + + assign ICacheMemWritePAdr = PCPFinalF; + + + endmodule diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 5a2d1b42..58b144f5 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -77,6 +77,8 @@ module ifu ( logic [31:0] nop = 32'h00000013; // instruction for NOP // *** send this to the trap unit logic ITLBPageFaultF; + logic reset_q; // *** look at this later. + tlb #(3) itlb(.TLBAccess(1'b1), .VirtualAddress(PCF), .PageTableEntryWrite(PageTableEntryF), .PageTypeWrite(PageTypeF), @@ -87,7 +89,7 @@ module ifu ( // branch predictor signals logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F; + logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F, PCNext2F; logic [3:0] InstrClassD, InstrClassE; @@ -98,10 +100,10 @@ module ifu ( // jarred 2021-03-14 Add instrution cache block to remove rd2 assign PCNextPF = PCNextF; // Temporary workaround until iTLB is live - icache ic( + icache icache( .*, - .UpperPCPF(PCPF[`XLEN-1:12]), - .LowerPCF(PCF[11:0]) + .UpperPCNextPF(PCNextPF[`XLEN-1:12]), + .LowerPCNextF(PCNextPF[11:0]) ); assign PrivilegedChangePCM = RetM | TrapM; @@ -120,7 +122,17 @@ module ifu ( mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), + .y(PCNext2F)); + + mux2 #(`XLEN) pcmux3(.d0(PCNext2F), + .d1(`RESET_VECTOR), + .s(reset_q), .y(UnalignedPCNextF)); + + flop #(1) resetReg (.clk(clk), + .d(reset), + .q(reset_q)); + assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 2d11bcc8..bd51596d 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -447,7 +447,7 @@ module testbench(); // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, - dut.hart.ifu.ic.controller.AlignedInstrRawF, + dut.hart.ifu.icache.controller.FinalInstrRawF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);