From cab6b9dfc8a80ac95eeb6f142b650752c05aeac4 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 17:49:44 -0600 Subject: [PATCH 01/17] Fixed a bunch of odd bugs with the test bench preventing correct measurement of performance counters. --- sim/wave.do | 27 +++++++++++++++++++-------- testbench/common/functionName.sv | 30 ++++++++++++++++++++++-------- testbench/testbench.sv | 6 ------ 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/sim/wave.do b/sim/wave.do index d9e8869a..fd95f6f1 100644 --- a/sim/wave.do +++ b/sim/wave.do @@ -6,6 +6,17 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW +add wave -noupdate /testbench/FunctionName/FunctionName/PCD +add wave -noupdate /testbench/FunctionName/FunctionName/PCE +add wave -noupdate /testbench/FunctionName/FunctionName/PCF +add wave -noupdate /testbench/FunctionName/FunctionName/PCM +add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp +add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld +add wave -noupdate /testbench/dut/core/InstrValidM +add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr +add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex +add wave -noupdate /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrMapLineCount add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD @@ -55,11 +66,12 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE -add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE -add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE +add wave -noupdate -group {Execution Stage} /testbench/InstrEName +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE +add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName @@ -622,10 +634,9 @@ add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAcc add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM add wave -noupdate /testbench/clk -add wave -noupdate /testbench/HPMCSample/FinalHPMCOUNTERH add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {717301 ns} 0} {{Cursor 5} {394987 ns} 1} +WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {49231900 ns} 0} {{Cursor 5} {394987 ns} 1} quietly wave cursor active 4 configure wave -namecolwidth 250 configure wave -valuecolwidth 194 @@ -641,4 +652,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {717254 ns} {717585 ns} +WaveRestoreZoom {49231842 ns} {49231960 ns} diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index 17b27ec5..2c658a26 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -35,22 +35,29 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); string FunctionName; - logic [`XLEN-1:0] PCF, PCD, PCE, FunctionAddr; - logic StallD, StallE, FlushD, FlushE; + logic [`XLEN-1:0] PCF, PCD, PCE, PCM, FunctionAddr, PCM_temp, PCMOld; + logic StallD, StallE, StallM, FlushD, FlushE, FlushM; + logic InstrValidM; integer ProgramAddrIndex, ProgramAddrIndexQ; assign PCF = testbench.dut.core.ifu.PCF; assign StallD = testbench.dut.core.StallD; assign StallE = testbench.dut.core.StallE; + assign StallM = testbench.dut.core.StallM; assign FlushD = testbench.dut.core.FlushD; assign FlushE = testbench.dut.core.FlushE; + assign FlushM = testbench.dut.core.FlushM; + assign InstrValidM = testbench.dut.core.InstrValidM; // copy from ifu // when the F and D stages are flushed we need to ensure the PCE is held so that the function name does not // erroneously change. - flopenrc #(`XLEN) PCDReg(clk, reset, 1'b0, ~StallD, FlushE & FlushD ? PCE : PCF, PCD); - flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushE ? PCE : PCD, PCE); - + // also need to hold the old value not an erroneously fetched PC. + flopenr #(`XLEN) PCDReg(clk, reset, ~StallD, FlushD ? PCE : PCF, PCD); + flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushD & FlushE ? PCF : FlushE ? PCE : PCD, PCE); + flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, FlushD & FlushE & FlushM ? PCF : FlushE & FlushM ? PCE : FlushM ? PCM : PCE, PCM_temp); + flopenr #(`XLEN) PCMOldReg(clk, reset, InstrValidM, PCM_temp, PCMOld); + assign PCM = InstrValidM ? PCM_temp : PCMOld; task automatic bin_search_min; @@ -111,7 +118,11 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); // preload // initial begin - always @ (posedge reset) begin + always @ (negedge reset) begin + // clear out the old mapping between programs. + foreach(ProgramAddrMapMemory[i]) ProgramAddrMapMemory.delete(i); + foreach(ProgramLabelMapMemory[i]) ProgramLabelMapMemory.delete(i); + $readmemh(ProgramAddrMapFile, ProgramAddrMapMemory); // we need to count the number of lines in the file so we can set FunctionRadixLineCount. @@ -147,11 +158,14 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); $display("Cannot open file %s for reading.", ProgramLabelMapFile); end $fclose(ProgramLabelMapFP); + + foreach(ProgramAddrMapMemory[i]) $display("%x", ProgramAddrMapMemory[i]); + foreach(ProgramLabelMapMemory[i]) $display("%s", ProgramLabelMapMemory[i]); end - always @(PCE) begin - bin_search_min(PCE, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex); + always @(PCM) begin + bin_search_min(PCM, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex); end logic OrReducedAdr, AnyUnknown; diff --git a/testbench/testbench.sv b/testbench/testbench.sv index e6f025e8..c4581fa8 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -408,7 +408,6 @@ logic [3:0] dummy; logic StartSample; logic EndSample, EndSampleFirst, EndSampleDelayed; logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0]; - logic [`XLEN-1:0] FinalHPMCOUNTERH[`COUNTERS-1:0]; string HPMCnames[] = '{"Mcycle", "------", @@ -464,11 +463,6 @@ logic [3:0] dummy; InitialHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; end end - if(EndSample) begin - for(HPMCindex = 0; HPMCindex < 32; HPMCindex += 1) begin - FinalHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; - end - end if(EndSample) begin for(HPMCindex = 0; HPMCindex < HPMCnames.size(); HPMCindex += 1) begin // unlikely to have more than 10M in any counter. From 7599b563a6585f4be48a8b29ceb3851a25738db1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 17:52:00 -0600 Subject: [PATCH 02/17] Removed debugging code. --- testbench/common/functionName.sv | 3 --- 1 file changed, 3 deletions(-) diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index 2c658a26..c986c2e5 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -159,9 +159,6 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); end $fclose(ProgramLabelMapFP); - foreach(ProgramAddrMapMemory[i]) $display("%x", ProgramAddrMapMemory[i]); - foreach(ProgramLabelMapMemory[i]) $display("%s", ProgramLabelMapMemory[i]); - end always @(PCM) begin From f13017a92750653d8a667651c867df16c9312225 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 13:45:15 -0600 Subject: [PATCH 03/17] Updated parsing script. --- bin/parseHPMC.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 1ce084fc..dcac5182 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -39,20 +39,20 @@ def ComputeCPI(benchmark): def ComputeBranchDirMissRate(benchmark): 'Computes and inserts branch direction miss prediction rate.' (nameString, opt, dataDict) = benchmark - branchDirMissRate = 100.0 * int(dataDict['Br Dir Wrong']) / int(dataDict['Br Count']) + branchDirMissRate = 100.0 * int(dataDict['BP Dir Wrong']) / int(dataDict['Br Count']) dataDict['BDMR'] = branchDirMissRate def ComputeBranchTargetMissRate(benchmark): 'Computes and inserts branch target miss prediction rate.' # *** this is wrong in the verilog test bench (nameString, opt, dataDict) = benchmark - branchTargetMissRate = 100.0 * int(dataDict['Br Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump, JR, Jal']) + int(dataDict['ret'])) + branchTargetMissRate = 100.0 * int(dataDict['BP Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump Not Return'])) dataDict['BTMR'] = branchTargetMissRate def ComputeRASMissRate(benchmark): 'Computes and inserts return address stack miss prediction rate.' (nameString, opt, dataDict) = benchmark - RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['ret']) + RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['Return']) dataDict['RASMPR'] = RASMPR def ComputeInstrClassMissRate(benchmark): @@ -70,7 +70,9 @@ def ComputeICacheMissRate(benchmark): def ComputeICacheMissTime(benchmark): 'Computes and inserts instruction class miss prediction rate.' (nameString, opt, dataDict) = benchmark - ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / int(dataDict['I Cache Miss']) + cycles = int(dataDict['I Cache Miss']) + if(cycles == 0): ICacheMR = 0 + else: ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / cycles dataDict['ICacheMT'] = ICacheMR def ComputeDCacheMissRate(benchmark): @@ -82,8 +84,10 @@ def ComputeDCacheMissRate(benchmark): def ComputeDCacheMissTime(benchmark): 'Computes and inserts instruction class miss prediction rate.' (nameString, opt, dataDict) = benchmark - ICacheMR = 100.0 * int(dataDict['D Cache Cycles']) / int(dataDict['D Cache Miss']) - dataDict['DCacheMT'] = ICacheMR + cycles = int(dataDict['D Cache Miss']) + if(cycles == 0): DCacheMR = 0 + else: DCacheMR = 100.0 * int(dataDict['D Cache Cycles']) / cycles + dataDict['DCacheMT'] = DCacheMR def ComputeAll(benchmarks): for benchmark in benchmarks: From da9627708ec8fbb28c6670c1c6bc322bfc635b5d Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Fri, 3 Mar 2023 09:46:29 -0800 Subject: [PATCH 04/17] Added correct causing and handling of S time interrupts to test suite. --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 42 +++++++++++++++---- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 35 ++++++++++++---- 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 1171786f..7a52b08d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -125,20 +125,34 @@ cause_m_time_interrupt: lw t2, 0(t5) // low word of MTIME lw t6, 4(t5) // high word of MTIME add t3, t2, t3 // add desired offset to the current time - bgtu t3, t2, nowrap // check new time exceeds current time (no wraparound) + bgtu t3, t2, nowrap_m // check new time exceeds current time (no wraparound) addi t6, t6, 1 // if wrap, increment most significant word sw t6,4(t4) // store into most significant word of MTIMECMP -nowrap: +nowrap_m: sw t3, 0(t4) // store into least significant word of MTIMECMP -time_loop: +time_loop_m: addi a3, a3, -1 - bnez a3, time_loop // go through this loop for [a3 value] iterations before returning without performing interrupt + bnez a3, time_loop_m // go through this loop for [a3 value] iterations before returning without performing interrupt ret cause_s_time_interrupt: - li t3, 0x20 - csrs mip, t3 // set supervisor time interrupt pending. SIP is a subset of MIP, so writing this should also change MIP. - nop // added extra nops in so the csrs can get through the pipeline before returning. + li t3, 0x2 + csrs mcounteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + li t3, 0x30 // Desired offset from the present time + mv a3, t3 // copy value in to know to stop waiting for interrupt after this many cycles + // la t4, 0x02004000 // MTIMECMP register in CLINT + la t5, 0x0200BFF8 // MTIME register in CLINT *** we still read from mtime since stimecmp is compared to it + lw t2, 0(t5) // low word of MTIME + lw t6, 4(t5) // high word of MTIME + add t3, t2, t3 // add desired offset to the current time + bgtu t3, t2, nowrap_s // check new time exceeds current time (no wraparound) + addi t6, t6, 1 // if wrap, increment most significant word +nowrap_s: + csrw 0x14D, t3 // store into STIMECMP + csrw 0x15D, t6 // store into STIMECMPH +time_loop_s: + addi a3, a3, -1 + bnez a3, time_loop_s // go through this loop for [a3 value] iterations before returning without performing interrupt ret cause_m_soft_interrupt: @@ -353,6 +367,9 @@ trap_stack_saved_\MODE\(): // jump here after handling vectored interupt since w .endif + li t3, 0x2 + csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + // Respond to trap based on cause // All interrupts should return after being logged csrr ra, \MODE\()cause @@ -423,6 +440,9 @@ trapreturn_specified_\MODE\(): li a2, 0 // reset trapreturn inputs to the trap handler trapreturn_finished_\MODE\(): + li t3, 0x2 + csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + csrw \MODE\()epc, ra // update the mepc with address of next instruction lw t2, -12(sp) // restore registers from stack before returning lw t0, -8(sp) @@ -545,8 +565,12 @@ soft_interrupt_\MODE\(): time_interrupt_\MODE\(): la t0, 0x02004000 // MTIMECMP register in CLINT li t2, 0xFFFFFFFF - sw t2, 0(t0) // reset interrupt by setting mtimecmp to 0xFFFFFFFF - + sw t2, 0(t0) // reset interrupt by setting mtimecmp to max + //sw t2, 4(t0) // reset interrupt by setting mtimecmpH to max + csrw 0x14D, t2 // reset stime interrupts by doing the same to stimecmp and stimecmpH. + csrw 0x15D, t2 + + li t0, 0x20 csrc \MODE\()ip, t0 lw ra, -4(sp) // load return address from stack into ra (the address to return to after the loop is complete) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 576636de..4a6fa94a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -127,22 +127,32 @@ cause_m_time_interrupt: lw t2, 0(t5) // low word of MTIME lw t6, 4(t5) // high word of MTIME add t3, t2, t3 // add desired offset to the current time - bgtu t3, t2, nowrap // check new time exceeds current time (no wraparound) + bgtu t3, t2, nowrap_m // check new time exceeds current time (no wraparound) addi t6, t6, 1 // if wrap, increment most significant word sw t6,4(t4) // store into most significant word of MTIMECMP -nowrap: +nowrap_m: sw t3, 0(t4) // store into least significant word of MTIMECMP -time_loop: +time_loop_m: addi a3, a3, -1 - bnez a3, time_loop // go through this loop for [a3 value] iterations before returning without performing interrupt + bnez a3, time_loop_m // go through this loop for [a3 value] iterations before returning without performing interrupt ret cause_s_time_interrupt: - li t3, 0x20 - csrs mip, t3 // set supervisor time interrupt pending. - nop // added extra nops in so the csrs can get through the pipeline before returning. + li t3, 0x2 + csrs mcounteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + li t3, 0x30 // Desired offset from the present time + mv a3, t3 // copy value in to know to stop waiting for interrupt after this many cycles + // la t4, 0x02004000 // MTIMECMP register in CLINT + la t5, 0x0200BFF8 // MTIME register in CLINT *** we still read from mtime since stimecmp is compared to it + lw t2, 0(t5) // low word of MTIME + lw t6, 4(t5) // high word of MTIME + add t3, t2, t3 // add desired offset to the current time + csrw 0x14D, t3 // store into most significant word of STIMECMP +time_loop_s: + addi a3, a3, -1 + bnez a3, time_loop_s // go through this loop for [a3 value] iterations before returning without performing interrupt ret - + cause_m_soft_interrupt: la t3, 0x02000000 // MSIP register in CLINT li t4, 1 // 1 in the lsb @@ -347,6 +357,9 @@ trap_stack_saved_\MODE\(): // jump here after handling vectored interupt since w .endif + li t3, 0x2 + csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + // Respond to trap based on cause // All interrupts should return after being logged csrr ra, \MODE\()cause @@ -417,6 +430,9 @@ trapreturn_specified_\MODE\(): li a2, 0 // reset trapreturn inputs to the trap handler trapreturn_finished_\MODE\(): + li t3, 0x2 + csrc \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp + csrw \MODE\()epc, ra // update the epc with address of next instruction ld t2, -24(sp) // restore registers from stack before returning ld t0, -16(sp) @@ -539,7 +555,8 @@ soft_interrupt_\MODE\(): time_interrupt_\MODE\(): la t0, 0x02004000 // MTIMECMP register in CLINT li t2, 0xFFFFFFFF - sd t2, 0(t0) // reset interrupt by setting mtimecmp to 0xFFFFFFFF + sd t2, 0(t0) // reset interrupt by setting mtimecmp to max + csrw 0x14D, t2 // reset stime interrupts by doing the same. li t0, 0x20 csrc \MODE\()ip, t0 From 0ba1a59a709e635eb9c270c3146f68e54f700751 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Thu, 23 Feb 2023 13:45:44 -0800 Subject: [PATCH 05/17] added reset values to stime and stimecmp registers --- src/privileged/csrs.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 253d0245..e47a4442 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -102,10 +102,10 @@ module csrs #(parameter flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); if (`SSTC_SUPPORTED) begin if (`XLEN == 64) - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); + flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 64'hFFFFFFFFFFFFFFFF, STIMECMP_REGW); else begin - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); - flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); + flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[31:0]); + flopenl #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[63:32]); end end else assign STIMECMP_REGW = 0; From 9c4a69bb0e8be96e4668d81072d759091d43c766 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 17:10:58 -0600 Subject: [PATCH 06/17] Partial automation of branch predictor embenching. --- bin/parseHPMC.py | 4 +- sim/bpred-sim.py | 143 +++++++++++++++++++++++++++++++++++++++++++++ sim/wally-batch.do | 32 ++++++++++ 3 files changed, 177 insertions(+), 2 deletions(-) create mode 100755 sim/bpred-sim.py diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index dcac5182..3229d1c7 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -221,9 +221,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] size = len(benchmarkDict) index = 1 diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py new file mode 100755 index 00000000..3c5aa2d5 --- /dev/null +++ b/sim/bpred-sim.py @@ -0,0 +1,143 @@ +#!/usr/bin/python3 +################################## +# +# regression-wally +# David_Harris@Hmc.edu 25 January 2021 +# Modified by Jarred Allen +# +# Run a regression with multiple configurations in parallel and exit with +# non-zero status code if an error happened, as well as printing human-readable +# output. +# +################################## +import sys,os,shutil + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +from collections import namedtuple +regressionDir = os.path.dirname(os.path.abspath(__file__)) +os.chdir(regressionDir) + +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) +# name: the name of this test configuration (used in printing human-readable +# output and picking logfile names) +# cmd: the command to run to test (should include the logfile as '{}', and +# the command needs to write to that file) +# grepstr: the string to grep through the log file for. The test succeeds iff +# grep finds that string in the logfile (is used by grep, so it may +# be any pattern grep accepts, see `man 1 grep` for more info). + +# edit this list to add more test cases +configs = [ + TestCase( + name="lints", + variant="all", + cmd="./lint-wally | tee {}", + grepstr="All lints run with no errors or warnings" + ) +] + +configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_TWOBIT\" +define+BPRED_SIZE=6" +tc = TestCase( + name="twobit6", + variant="rv32gc", + cmd="vsim > {} -c < {} -c < {} -c < /dev/null" % (text, logfile) + return os.system(grepcmd) == 0 + +def run_test_case(config): + """Run the given test case, and return 0 if the test suceeds and 1 if it fails""" + logname = "logs/"+config.variant+"_"+config.name+".log" + cmd = config.cmd.format(logname) + print(cmd) + os.chdir(regressionDir) + os.system(cmd) + if search_log_for_text(config.grepstr, logname): + print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name)) + return 0 + else: + print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name)) + print(" Check %s" % logname) + return 1 + +def main(): + """Run the tests and count the failures""" + TIMEOUT_DUR = 10800 # 3 hours + + global configs + try: + os.chdir(regressionDir) + os.mkdir("logs") + #print(os.getcwd()) + #print(regressionDir) + except: + pass + try: + shutil.rmtree("wkdir") + except: + pass + finally: + os.mkdir("wkdir") + + if '-makeTests' in sys.argv: + os.chdir(regressionDir) + os.system('./make-tests.sh | tee ./logs/make-tests.log') + + # Scale the number of concurrent processes to the number of test cases, but + # max out at a limited number of concurrent processes to not overwhelm the system + with Pool(processes=min(len(configs),40)) as pool: + num_fail = 0 + results = {} + for config in configs: + results[config] = pool.apply_async(run_test_case,(config,)) + for (config,result) in results.items(): + try: + num_fail+=result.get(timeout=TIMEOUT_DUR) + except TimeoutError: + num_fail+=1 + print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR)) + + # Count the number of failures + if num_fail: + print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail) + else: + print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}") + return num_fail + +if __name__ == '__main__': + exit(main()) diff --git a/sim/wally-batch.do b/sim/wally-batch.do index 7e63de8a..0f03f903 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -26,6 +26,14 @@ if {$2 eq "ahb"} { vdel -lib wkdir/work_${1}_${2}_${3}_${4} -all } vlib wkdir/work_${1}_${2}_${3}_${4} + + +} elseif {$2 eq "configOptions"} { + if [file exists wkdir/work_${1}_${3}_${4}] { + vdel -lib wkdir/work_${1}_${3}_${4} -all + } + vlib wkdir/work_${1}_${3}_${4} + } else { if [file exists wkdir/work_${1}_${2}] { vdel -lib wkdir/work_${1}_${2} -all @@ -76,6 +84,30 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # power add -r /dut/core/* run -all # power off -r /dut/core/* + +} elseif {$2 eq "configOptions"} { + # set arguments " " + # for {set i 5} {$i <= $argc} {incr i} { + # append arguments "\$$i " + # } + # puts $arguments + # set options eval $arguments + # **** fix this so we can pass any number of +defines. + # only allows 3 right now + + vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 + # start and run simulation + # remove +acc flag for faster sim during regressions if there is no need to access internal signals + vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt + vsim -lib wkdir/work_${1}_${3}_${4} testbenchopt -fatal 7 -suppress 3829 + # Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time + #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf + #vsim -coverage -lib work_$2 workopt_$2 + # power add generates the logging necessary for said generation. + # power add -r /dut/core/* + run -all + # power off -r /dut/core/* + } else { vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 # start and run simulation From 00baa062347a07f63ff50957e1212b4f0c84a942 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 17:20:45 -0600 Subject: [PATCH 07/17] Mostly working bpred launch script. --- sim/bpred-sim.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 3c5aa2d5..1ec8b304 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -46,31 +46,18 @@ configs = [ ) ] -configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_TWOBIT\" +define+BPRED_SIZE=6" -tc = TestCase( - name="twobit6", - variant="rv32gc", - cmd="vsim > {} -c < {} -c < {} -c < {} -c < Date: Sat, 4 Mar 2023 15:46:26 -0800 Subject: [PATCH 08/17] added S time compare to gc configs --- config/rv32gc/wally-config.vh | 2 +- config/rv64gc/wally-config.vh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index d7475cdb..359e2d83 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -43,7 +43,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 -`define SSTC_SUPPORTED 0 +`define SSTC_SUPPORTED 1 // LSU microarchitectural Features `define BUS_SUPPORTED 1 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index 4e2ab3df..b5038be5 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -44,7 +44,7 @@ `define COUNTERS 32 `define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 -`define SSTC_SUPPORTED 0 +`define SSTC_SUPPORTED 1 // LSU microarchitectural Features `define BUS_SUPPORTED 1 From 4fa78a02b7035a2830fa247db1d848da3aa97418 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Sat, 4 Mar 2023 15:46:57 -0800 Subject: [PATCH 09/17] removed changes to counteren from stimecmp tests --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 9 --------- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 8 -------- 2 files changed, 17 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 7a52b08d..ad892e95 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -136,11 +136,8 @@ time_loop_m: ret cause_s_time_interrupt: - li t3, 0x2 - csrs mcounteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp li t3, 0x30 // Desired offset from the present time mv a3, t3 // copy value in to know to stop waiting for interrupt after this many cycles - // la t4, 0x02004000 // MTIMECMP register in CLINT la t5, 0x0200BFF8 // MTIME register in CLINT *** we still read from mtime since stimecmp is compared to it lw t2, 0(t5) // low word of MTIME lw t6, 4(t5) // high word of MTIME @@ -367,9 +364,6 @@ trap_stack_saved_\MODE\(): // jump here after handling vectored interupt since w .endif - li t3, 0x2 - csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp - // Respond to trap based on cause // All interrupts should return after being logged csrr ra, \MODE\()cause @@ -440,9 +434,6 @@ trapreturn_specified_\MODE\(): li a2, 0 // reset trapreturn inputs to the trap handler trapreturn_finished_\MODE\(): - li t3, 0x2 - csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp - csrw \MODE\()epc, ra // update the mepc with address of next instruction lw t2, -12(sp) // restore registers from stack before returning lw t0, -8(sp) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 4a6fa94a..1e86d963 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -138,8 +138,6 @@ time_loop_m: ret cause_s_time_interrupt: - li t3, 0x2 - csrs mcounteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp li t3, 0x30 // Desired offset from the present time mv a3, t3 // copy value in to know to stop waiting for interrupt after this many cycles // la t4, 0x02004000 // MTIMECMP register in CLINT @@ -357,9 +355,6 @@ trap_stack_saved_\MODE\(): // jump here after handling vectored interupt since w .endif - li t3, 0x2 - csrs \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp - // Respond to trap based on cause // All interrupts should return after being logged csrr ra, \MODE\()cause @@ -430,9 +425,6 @@ trapreturn_specified_\MODE\(): li a2, 0 // reset trapreturn inputs to the trap handler trapreturn_finished_\MODE\(): - li t3, 0x2 - csrc \MODE\()counteren, t3 // set mcounteren.TM to 1 to attempt to allow us to write to stimecmp - csrw \MODE\()epc, ra // update the epc with address of next instruction ld t2, -24(sp) // restore registers from stack before returning ld t0, -16(sp) From 5c3f5fe8c6d62b3d31626696f761196f2209ac29 Mon Sep 17 00:00:00 2001 From: Kip Macsai-Goren Date: Sat, 4 Mar 2023 15:53:03 -0800 Subject: [PATCH 10/17] added in the CSR name for stimecmp(h) --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 8 ++++---- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index ad892e95..f3f963d8 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -145,8 +145,8 @@ cause_s_time_interrupt: bgtu t3, t2, nowrap_s // check new time exceeds current time (no wraparound) addi t6, t6, 1 // if wrap, increment most significant word nowrap_s: - csrw 0x14D, t3 // store into STIMECMP - csrw 0x15D, t6 // store into STIMECMPH + csrw stimecmp, t3 // store into STIMECMP + csrw stimecmph, t6 // store into STIMECMPH time_loop_s: addi a3, a3, -1 bnez a3, time_loop_s // go through this loop for [a3 value] iterations before returning without performing interrupt @@ -558,8 +558,8 @@ time_interrupt_\MODE\(): li t2, 0xFFFFFFFF sw t2, 0(t0) // reset interrupt by setting mtimecmp to max //sw t2, 4(t0) // reset interrupt by setting mtimecmpH to max - csrw 0x14D, t2 // reset stime interrupts by doing the same to stimecmp and stimecmpH. - csrw 0x15D, t2 + csrw stimecmp, t2 // reset stime interrupts by doing the same to stimecmp and stimecmpH. + csrw stimecmph, t2 li t0, 0x20 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 1e86d963..00e235f3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -145,7 +145,7 @@ cause_s_time_interrupt: lw t2, 0(t5) // low word of MTIME lw t6, 4(t5) // high word of MTIME add t3, t2, t3 // add desired offset to the current time - csrw 0x14D, t3 // store into most significant word of STIMECMP + csrw stimecmp, t3 // store into most significant word of STIMECMP time_loop_s: addi a3, a3, -1 bnez a3, time_loop_s // go through this loop for [a3 value] iterations before returning without performing interrupt @@ -548,7 +548,7 @@ time_interrupt_\MODE\(): la t0, 0x02004000 // MTIMECMP register in CLINT li t2, 0xFFFFFFFF sd t2, 0(t0) // reset interrupt by setting mtimecmp to max - csrw 0x14D, t2 // reset stime interrupts by doing the same. + csrw stimecmp, t2 // reset stime interrupts by doing the same. li t0, 0x20 csrc \MODE\()ip, t0 From 22367e4c207222c2056616760f8b3965164ed838 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 17:59:16 -0600 Subject: [PATCH 11/17] Working batch mode branch prediction simulations. --- sim/bpred-sim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 1ec8b304..60574e37 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -47,7 +47,7 @@ configs = [ ] bpdSize = [6, 8, 10, 12, 14, 16] -bpdType = ['twobit', 'gshare'] +bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic'] for CurrBPType in bpdType: for CurrBPSize in bpdSize: name = CurrBPType+str(CurrBPSize) From fe163bbab3c04df51ec8040196d6b3d8a7965ad6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 13:14:48 -0600 Subject: [PATCH 12/17] Updated fpga ila script. --- fpga/constraints/debug2.xdc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index 7ea5bf6b..8c1e0947 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -266,7 +266,7 @@ connect_debug_port u_ila_0/probe50 [get_nets [list wallypipelinedsoc/uncore.unco create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe51] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe51] -connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPPredWrongE ]] +connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPWrongE ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe52] From 82ada79b119c89bef27ba3c50541055c08f8e141 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:47:55 -0600 Subject: [PATCH 13/17] Renamed ebuarbfsm to ebufsmarb to match figures. --- src/ebu/{ebuarbfsm.sv => ebufsmarb.sv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/ebu/{ebuarbfsm.sv => ebufsmarb.sv} (100%) diff --git a/src/ebu/ebuarbfsm.sv b/src/ebu/ebufsmarb.sv similarity index 100% rename from src/ebu/ebuarbfsm.sv rename to src/ebu/ebufsmarb.sv From e831efddafc9320b96b7183c8a85bb35d502b79a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:48:57 -0600 Subject: [PATCH 14/17] Renamed InstrFirstHalf to InstrFirstHalfF. --- src/ifu/spill.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4b89a3ce..4d971cba 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -58,7 +58,7 @@ module spill #( logic SpillF; logic SelSpillF; logic SpillSaveF; - logic [15:0] InstrFirstHalf; + logic [15:0] InstrFirstHalfF; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic @@ -102,10 +102,10 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf); + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); // merge together - mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF); + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x always_comb From 6fc157e6288978b6d48fc0f2648451f491fa2b5b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:50:57 -0600 Subject: [PATCH 15/17] Renamed PCFSpill to PCSpillF. --- src/ifu/ifu.sv | 8 ++++---- src/ifu/spill.sv | 4 ++-- src/lsu/lsu.sv | 4 ++-- src/mmu/hptw.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 2c2ee7b4..fa3eab16 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -47,7 +47,7 @@ module ifu ( output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU - output logic [`XLEN-1:0] PCFSpill, // PCF with possible + 2 to handle spill to HPTW + output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW // Execute output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic PCSrcE, // Executation stage branch is taken @@ -136,7 +136,7 @@ module ifu ( logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM - assign PCFExt = {2'b00, PCFSpill}; + assign PCFExt = {2'b00, PCSpillF}; ///////////////////////////////////////////////////////////////////////////////////////////// // Spill Support @@ -144,10 +144,10 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCSpillF, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCNextFSpill = PCNextF; - assign PCFSpill = PCF; + assign PCSpillF = PCF; assign PostSpillInstrRawF = InstrRawF; assign {SelNextSpillF, CompressedF} = 0; end diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4d971cba..d4e85faf 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -44,7 +44,7 @@ module spill #( input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill - output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill + output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic CompressedF); // The fetched instruction is compressed @@ -69,7 +69,7 @@ module spill #( // select between PCNextF and PCF+2 mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill)); // select between PCF and PCF+2 - mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); + mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9f11f700..91ad694e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -80,7 +80,7 @@ module lsu ( input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege input logic [1:0] STATUS_MPP, // Machine previous privilege mode - input logic [`XLEN-1:0] PCFSpill, // Fetch PC + input logic [`XLEN-1:0] PCSpillF, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB @@ -152,7 +152,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, - .FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill, + .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index f2df8ea9..aac0a606 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -34,7 +34,7 @@ module hptw ( input logic clk, reset, input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCFSpill, // addresses to translate + input logic [`XLEN-1:0] PCSpillF, // addresses to translate input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate input logic [1:0] MemRWM, AtomicM, // system status @@ -111,7 +111,7 @@ module hptw ( assign TLBMiss = (DTLBMissOrDAFaultM | ITLBMissOrDAFaultF); // Determine which address to translate - mux2 #(`XLEN) vadrmux(PCFSpill, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr); + mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr); assign CurrentPPN = PTE[`PPN_BITS+9:10]; // State flops diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 6c2d5816..a226c783 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -63,7 +63,7 @@ module wallypipelinedcore ( logic [2:0] Funct3E; logic [31:0] InstrD; logic [31:0] InstrM; - logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE; + logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE; logic [`XLEN-1:0] PCM; logic [`XLEN-1:0] CSRReadValW, MDUResultW; logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; @@ -170,7 +170,7 @@ module wallypipelinedcore ( .InstrValidM, .InstrValidE, .InstrValidD, .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, // Fetch - .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, + .HRDATA, .PCSpillF, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute @@ -241,7 +241,7 @@ module wallypipelinedcore ( .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege .InstrUpdateDAF, - .PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, + .PCSpillF, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); if(`BUS_SUPPORTED) begin : ebu From 4b539de18442e6fd3b8ace5d2e9d23e5364cbfa5 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 18:29:21 -0600 Subject: [PATCH 16/17] Renamed signals to be consistent with textbook. --- src/ifu/ifu.sv | 18 +++++++++--------- src/ifu/spill.sv | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index fa3eab16..41b0de00 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -101,7 +101,7 @@ module ifu ( logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) - logic [`XLEN-1:0] PCNextFSpill; // Next PCF after possible + 2 to handle spill + logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F logic [`XLEN-1:0] PCD; // Decode stage instruction address @@ -126,7 +126,7 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable - logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF + logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill @@ -144,12 +144,12 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCSpillF, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill - assign PCNextFSpill = PCNextF; + assign PCSpillNextF = PCNextF; assign PCSpillF = PCF; assign PostSpillInstrRawF = InstrRawF; - assign {SelNextSpillF, CompressedF} = 0; + assign {SelSpillNextF, CompressedF} = 0; end //////////////////////////////////////////////////////////////////////////////////////////////// @@ -213,7 +213,7 @@ module ifu ( logic IROMce; assign IROMce = ~GatedStallD | reset; assign IFURWF = 2'b10; - irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF); + irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[`XLEN-1:0]), .IROMInstrF); end else begin assign IFURWF = 2'b10; end @@ -245,7 +245,7 @@ module ifu ( .CacheWriteData('0), .CacheRW(CacheRWF), .CacheAtomic('0), .FlushCache('0), - .NextAdr(PCNextFSpill[11:0]), + .NextAdr(PCSpillNextF[11:0]), .PAdr(PCPF), .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW) @@ -286,8 +286,8 @@ module ifu ( end assign IFUCacheBusStallD = ICacheStallF | BusStall; - assign IFUStallF = IFUCacheBusStallD | SelNextSpillF; - assign GatedStallD = StallD & ~SelNextSpillF; + assign IFUStallF = IFUCacheBusStallD | SelSpillNextF; + assign GatedStallD = StallD & ~SelSpillNextF; flopenl #(32) AlignedInstrRawDFlop(clk, reset | FlushD, ~StallD, PostSpillInstrRawF, nop, InstrRawD); diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index d4e85faf..73f302ca 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -43,9 +43,9 @@ module spill #( input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) - output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill + output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill - output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic CompressedF); // The fetched instruction is compressed @@ -57,7 +57,7 @@ module spill #( logic TakeSpillF; logic SpillF; logic SelSpillF; - logic SpillSaveF; + logic SpillSaveF; logic [15:0] InstrFirstHalfF; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -67,7 +67,7 @@ module spill #( // compute PCF+2 from the raw PC+4 mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); // select between PCNextF and PCF+2 - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill)); + mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelSpillNextF & ~FlushD), .y(PCSpillNextF)); // select between PCF and PCF+2 mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); @@ -94,7 +94,7 @@ module spill #( end assign SelSpillF = (CurrState == STATE_SPILL); - assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD; //////////////////////////////////////////////////////////////////////////////////////////////////// From fc9081b64c0c5980de557ea868db9d65c5b0d09f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 7 Mar 2023 10:49:59 -0600 Subject: [PATCH 17/17] Added Yujun Lin's branch predictor simulator. This is a C baseline module for common branch predictor algorithms. --- .gitmodules | 3 +++ addins/branch-predictor-simulator | 1 + 2 files changed, 4 insertions(+) create mode 160000 addins/branch-predictor-simulator diff --git a/.gitmodules b/.gitmodules index ab45d3f9..78243a1d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -21,3 +21,6 @@ [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark +[submodule "addins/branch-predictor-simulator"] + path = addins/branch-predictor-simulator + url = https://github.com/synxlin/branch-predictor-simulator.git diff --git a/addins/branch-predictor-simulator b/addins/branch-predictor-simulator new file mode 160000 index 00000000..af0c6f8c --- /dev/null +++ b/addins/branch-predictor-simulator @@ -0,0 +1 @@ +Subproject commit af0c6f8cb62f48ee43e74c21e799102e03951ce2