From bb39570576f59666446697728a6aa47c7deb0381 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 6 Feb 2023 15:38:57 -0800 Subject: [PATCH 01/14] Fixed floating point crash in debug.S --- tests/custom/debug/Makefile | 8 +++++--- tests/custom/debug/debug.S | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/custom/debug/Makefile b/tests/custom/debug/Makefile index 9ee0b2e7..4616dde6 100644 --- a/tests/custom/debug/Makefile +++ b/tests/custom/debug/Makefile @@ -17,11 +17,13 @@ $(TARGET).elf: $(TARGET).S Makefile sim: spike --isa=rv64gc +signature=$(TARGET).signature.output +signature-granularity=8 $(TARGET).elf - diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit - echo "Signature matches! Success!" +# diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit +# echo "Signature matches! Success!" + mkdir -p ../work + cp -f * ../work clean: - rm -f $(TARGET).elf $(TARGET).elf.* + rm -f $(TARGET).elf $(TARGET).elf.* *.signature.output diff --git a/tests/custom/debug/debug.S b/tests/custom/debug/debug.S index 431a261e..5be3c201 100644 --- a/tests/custom/debug/debug.S +++ b/tests/custom/debug/debug.S @@ -5,6 +5,8 @@ .global rvtest_entry_point rvtest_entry_point: + lui t0, 0x1e # turn on Floating point and XS + csrs mstatus, t0 # openhwgroup/cvw Issue #55 la a6, begin_signature From 93637fd9cbffd3059986c2ba9098db7e2b4edbb3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 6 Feb 2023 16:47:56 -0800 Subject: [PATCH 02/14] debug simulating, produing discrepancy --- tests/custom/debug/Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/custom/debug/Makefile b/tests/custom/debug/Makefile index 4616dde6..ddabe4e3 100644 --- a/tests/custom/debug/Makefile +++ b/tests/custom/debug/Makefile @@ -2,8 +2,15 @@ TARGET = debug +$(TARGET).signature.output: $(TARGET).elf.memfile $(TARGET).elf + spike --isa=rv64gc +signature=$(TARGET).signature.output +signature-granularity=4 $(TARGET).elf +# diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit +# echo "Signature matches! Success!" + mkdir -p ../work + cp -f * ../work + $(TARGET).elf.memfile:$(TARGET).elf $(TARGET).elf.objdump.addr - riscv64-unknown-elf-elf2hex --bit-width $(if $(findstring rv64,$*),64,32) --input $< --output $@ + riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@ $(TARGET).elf.objdump.addr: $(TARGET).elf.objdump extractFunctionRadix.sh $< @@ -15,12 +22,6 @@ $(TARGET).elf: $(TARGET).S Makefile riscv64-unknown-elf-gcc -g -o $(TARGET).elf -march=rv64gc -mabi=lp64 -mcmodel=medany \ -nostartfiles -T$(WALLY)/examples/link/link.ld $(TARGET).S -sim: - spike --isa=rv64gc +signature=$(TARGET).signature.output +signature-granularity=8 $(TARGET).elf -# diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit -# echo "Signature matches! Success!" - mkdir -p ../work - cp -f * ../work clean: rm -f $(TARGET).elf $(TARGET).elf.* *.signature.output From 0712fa8f672d3363ae441a2173144818d4926975 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 7 Feb 2023 06:31:14 -0800 Subject: [PATCH 03/14] Disabled STATUS_FS at reset, fixing issue #71 --- src/privileged/csrsr.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index c5d5e7a1..92f0f504 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -145,7 +145,7 @@ module csrsr ( STATUS_MXR_INT <= #1 0; STATUS_SUM_INT <= #1 0; STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 - STATUS_FS_INT <= #1 `F_SUPPORTED ? 2'b01 : 2'b00; + STATUS_FS_INT <= #1 `F_SUPPORTED ? 2'b00 : 2'b00; // leave floating-point off until activated, even if F_SUPPORTED STATUS_MPP <= #1 0; STATUS_SPP <= #1 0; STATUS_MPIE <= #1 0; From 195e7c1a9c6baf2ade54c7baa956fd48ec961539 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 7 Feb 2023 06:55:42 -0800 Subject: [PATCH 04/14] Moved STATUS_FS_INT write to if statement to properly prioritize --- src/privileged/csrsr.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 92f0f504..1fa1fe8e 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -156,8 +156,6 @@ module csrsr ( STATUS_SBE <= #1 0; STATUS_UBE <= #1 0; end else if (~StallW) begin - if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #1 2'b11; // mark Float State dirty *** this should happen in M stage, be part of if/else; - if (TrapM) begin // Update interrupt enables per Privileged Spec p. 21 // y = PrivilegeModeW @@ -211,6 +209,6 @@ module csrsr ( STATUS_SPIE <= #1 `S_SUPPORTED & CSRWriteValM[5]; STATUS_SIE <= #1 `S_SUPPORTED & CSRWriteValM[1]; STATUS_UBE <= #1 CSRWriteValM[6] & `U_SUPPORTED & `BIGENDIAN_SUPPORTED; - end + end else if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #1 2'b11; end endmodule From bcd90bdb4eb69b73abc30691352f0a079e04532c Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 7 Feb 2023 16:49:50 -0800 Subject: [PATCH 05/14] Paths changed in latest GCC --- README.md | 6 +++--- bin/wally-tool-chain-install.sh | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 400df6b3..a7e2b99d 100644 --- a/README.md +++ b/README.md @@ -110,11 +110,11 @@ Ubuntu users may need to install and update various tools. Beware when cutting ### Install RISC-V GCC Cross-Compiler -To install GCC from source can take hours to compile. This configuration enables multilib to target many flavors of RISC-V. This book is tested with GCC 12.2 (tagged 2022.09.21), but will likely work with newer versions as well. +To install GCC from source can take hours to compile. This configuration enables multilib to target many flavors of RISC-V. This book is tested with GCC 12.2 (tagged 2023.01.31), but will likely work with newer versions as well. $ git clone https://github.com/riscv/riscv-gnu-toolchain $ cd riscv-gnu-toolchain - $ git checkout 2022.09.21 + $ git checkout 2023.01.31 $ ./configure --prefix=$RISCV --enable-multilib --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" $ make --jobs @@ -143,7 +143,7 @@ Spike also takes a while to install and compile, but this can be done concurrent $ git clone https://github.com/riscv-software-src/riscv-isa-sim $ mkdir riscv-isa-sim/build $ cd riscv-isa-sim/build - $ ../configure --prefix=$RISCV --enable-commitlog + $ ../configure --prefix=$RISCV $ make --jobs $ make install $ cd ../arch_test_target/spike/device diff --git a/bin/wally-tool-chain-install.sh b/bin/wally-tool-chain-install.sh index 2b45d8e7..c4d9ed74 100755 --- a/bin/wally-tool-chain-install.sh +++ b/bin/wally-tool-chain-install.sh @@ -54,13 +54,15 @@ fi cd $RISCV git clone https://github.com/riscv/riscv-gnu-toolchain cd riscv-gnu-toolchain +git checkout 2023.01.31 ./configure --prefix=${RISCV} --enable-multilib --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" make -j ${NUM_THREADS} make install # elf2hex cd $RISCV -export PATH=$RISCV/riscv-gnu-toolchain/bin:$PATH +#export PATH=$RISCV/riscv-gnu-toolchain/bin:$PATH +gexport PATH=$RISCV/bin:$PATH git clone https://github.com/sifive/elf2hex.git cd elf2hex autoreconf -i @@ -87,7 +89,7 @@ cd $RISCV git clone https://github.com/riscv-software-src/riscv-isa-sim mkdir -p riscv-isa-sim/build cd riscv-isa-sim/build -../configure --prefix=$RISCV --enable-commitlog +../configure --prefix=$RISCV make -j ${NUM_THREADS} make install cd ../arch_test_target/spike/device From 7383fbd14427b3889a63fa328460af8c9452f11b Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 8 Feb 2023 13:02:21 -0800 Subject: [PATCH 06/14] Removed unnecessary --enable-multilib from gcc build commands because --with-multilib-generator implies it --- README.md | 2 +- bin/wally-tool-chain-install.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index efd7e26c..d8c2432d 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ To install GCC from source can take hours to compile. This configuration enables $ git clone https://github.com/riscv/riscv-gnu-toolchain $ cd riscv-gnu-toolchain $ git checkout 2023.01.31 - $ ./configure --prefix=$RISCV --enable-multilib --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" + $ ./configure --prefix=$RISCV --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" $ make --jobs Note: make --jobs will reduce compile time by compiling in parallel. However, adding this option could dramatically increase the memory utilization of your local machine. diff --git a/bin/wally-tool-chain-install.sh b/bin/wally-tool-chain-install.sh index c4d9ed74..331ca13d 100755 --- a/bin/wally-tool-chain-install.sh +++ b/bin/wally-tool-chain-install.sh @@ -55,7 +55,7 @@ cd $RISCV git clone https://github.com/riscv/riscv-gnu-toolchain cd riscv-gnu-toolchain git checkout 2023.01.31 -./configure --prefix=${RISCV} --enable-multilib --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" +./configure --prefix=${RISCV} --with-multilib-generator="rv32e-ilp32e--;rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv32imafdc-ilp32d--;rv64i-lp64--;rv64ic-lp64--;rv64iac-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--;rv64im-lp64--;" make -j ${NUM_THREADS} make install From 996bb289d3a2797f8b545ff4ef754e8a36343408 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 8 Feb 2023 18:24:38 -0600 Subject: [PATCH 07/14] Simplified branch predictor. --- src/ifu/bpred/bpred.sv | 62 ++++++++++++++---------------------------- src/ifu/bpred/btb.sv | 4 +-- 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 700ec409..2d4dfe5c 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -69,12 +69,12 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; - logic PredictionInstrClassWrongE; + logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; - logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; + logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; logic SelBPPredF; logic [`XLEN-1:0] BPPredPCF; @@ -82,7 +82,6 @@ module bpred ( logic [`XLEN-1:0] PCCorrectE; logic [3:0] WrongPredInstrClassD; - logic BTBTargetWrongE; logic RASTargetWrongE; logic JumpOrTakenBranchE; @@ -132,20 +131,15 @@ module bpred ( -----/\----- EXCLUDED -----/\----- */ end - // this predictor will have two pieces of data, - // 1) A direction (1 = Taken, 0 = Not Taken) - // 2) Any information which is necessary for the predictor to build its next state. - // For a 2 bit table this is the prediction count. - // Part 2 Branch target address prediction - // *** For now the BTB will house the direct and indirect targets + // BTB contains target address for all CFI btb TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM, .PCNextF, .PCF, .PCD, .PCE, .PredPCF, .BTBPredInstrClassF, .PredValidF, - .PredictionInstrClassWrongE, + .AnyWrongPredInstrClassE, .IEUAdrE, .InstrClassD, .InstrClassE); @@ -205,16 +199,15 @@ module bpred ( flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); // branch predictor flopenrc #(4) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, PredictionInstrClassWrongE}, + {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, AnyWrongPredInstrClassE}, {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -223,11 +216,13 @@ module bpred ( // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. assign PredictionPCWrongE = PCCorrectE != PCD; - assign BPPredWrongE = PredictionPCWrongE & (|InstrClassE | BPPredClassNonCFIWrongE); - // The branch direction is checked inside each branch predictor, but does not actually matter for - // branch miss prediction recovery. If the class or direction is wrong, but the target is correct - // we an ignore the branch miss-prediction. + // branch class prediction wrong. + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; + assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; + + // Finally indicate if the branch predictor was wrong + assign BPPredWrongE = PredictionPCWrongE & (|InstrClassE | AnyWrongPredInstrClassE); // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -242,27 +237,6 @@ module bpred ( if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); else assign NextValidPCE = PCE; - // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. - // Also we want to track this in a performance counter. - assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE; - // The remaining checks are used for performance counters. - - - - // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. - //assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PredictionPCWrongE; - //assign BTBPredPCWrongE = TargetWrongE & (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PCSrcE; - assign BTBPredPCWrongE = BTBTargetWrongE; - - // similar with RAS. Over counts ras if the class prediction was wrong. - //assign RASPredPCWrongE = TargetWrongE & InstrClassE[2] & PCSrcE; - assign RASPredPCWrongE = RASTargetWrongE; - // Finally if the real instruction class is non CFI but the predictor said it was we need to count. - assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; - - // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; - // performance counters // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now @@ -270,11 +244,17 @@ module bpred ( // 3. target ras (ras target wrong / class[2]) // 4. direction (br dir wrong / class[0]) - assign BTBTargetWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; - assign RASTargetWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; + // Unforuantely we can't relay on PCD to infer the correctness of the BTB or RAS because the class prediction + // could be wrong or the fall through address selected for branch predict not taken. + // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of + // both without the above inaccuracies. + assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; + flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); + flopenrc #(`XLEN) BTBTargetDReg(clk, reset, FlushD, ~StallD, PredPCF, PredPCD); flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, PredPCD, PredPCE); diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 7cf9ed99..c538636d 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -39,7 +39,7 @@ module btb #(parameter int Depth = 10 ) ( output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class output logic PredValidF, // BTB's guess is valid // update - input logic PredictionInstrClassWrongE, // BTB's instruction class guess was wrong + input logic AnyWrongPredInstrClassE, // BTB's instruction class guess was wrong input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE // Instruction class to insert into btb @@ -98,7 +98,7 @@ module btb #(parameter int Depth = 10 ) ( //assign PredValidF = MatchXF ? 1'b1 : TablePredValidF; - assign UpdateEn = |InstrClassE | PredictionInstrClassWrongE; + assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( From faf7cd8c8a72daa0deba78cdbf39fbcd3a038971 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 9 Feb 2023 14:48:02 -0600 Subject: [PATCH 08/14] Updated globalhistory predictor. --- src/ifu/bpred/bpred.sv | 3 +- src/ifu/bpred/speculativeglobalhistory.sv | 102 +++++++++------------- 2 files changed, 44 insertions(+), 61 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 2d4dfe5c..fd9e8a92 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -103,8 +103,7 @@ module bpred ( end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), - .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 3ed9ba3e..51dbb422 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -29,35 +29,31 @@ `include "wally-config.vh" module speculativeglobalhistory #(parameter int k = 10 ) ( - input logic clk, - input logic reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + input logic clk, + input logic reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + output logic [1:0] DirPredictionF, + output logic DirPredictionWrongE, // update - input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, - input logic [3:0] WrongPredInstrClassD, - input logic PCSrcE + input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, + input logic [3:0] WrongPredInstrClassD, + input logic PCSrcE ); logic MatchF, MatchD, MatchE; logic MatchNextX, MatchXF; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE; - - logic [k-1:0] GHRF; - logic GHRExtraF; - logic [k-1:0] GHRD, GHRE, GHRM, GHRW; - logic [k-1:0] GHRNextF; - logic [k-1:0] GHRNextD; - logic [k-1:0] GHRNextE, GHRNextM, GHRNextW; - logic [k-1:0] IndexNextF, IndexF; - logic [k-1:0] IndexD, IndexE; - + logic [1:0] NewDirPredictionE; + logic [k-1:0] GHRF, GHRD, GHRE; + logic GHRLastF; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; + + logic FlushDOrDirWrong; assign IndexNextF = GHRNextF; assign IndexF = GHRF; @@ -70,20 +66,20 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( .rd1(TableDirPredictionF), .wa2(IndexE), .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), + .we2(InstrClassE[0]), .bwe2(1'b1)); // if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage // and then register for use in the Fetch stage. - assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); - assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); - assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); + assign MatchF = PredInstrClassF[0] & ~FlushD & (IndexNextF == IndexF); + assign MatchD = InstrClassD[0] & ~FlushE & (IndexNextF == IndexD); + assign MatchE = InstrClassE[0] & ~FlushM & (IndexNextF == IndexE); assign MatchNextX = MatchF | MatchD | MatchE; flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); - assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF : - MatchD ? NewDirPredictionD : + assign ForwardNewDirPrediction = MatchF ? {2{DirPredictionF[1]}} : + MatchD ? {2{DirPredictionD[1]}} : NewDirPredictionE ; flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); @@ -94,49 +90,37 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( flopenr #(2) PredictionRegD(clk, reset, ~StallD, DirPredictionF, DirPredictionD); flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE); - // New prediction pipeline - assign NewDirPredictionF = {DirPredictionF[1], DirPredictionF[1]}; - flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); // GHR pipeline - // this version fails the regression test do to pessimistic x propagation. - // assign GHRNextF = FlushD | DirPredictionWrongE ? GHRNextD[k-1:0] : - // BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : - // GHRF; - always_comb begin - if(FlushD | DirPredictionWrongE) begin - GHRNextF = GHRNextD[k-1:0]; - end else if(BranchInstrF) GHRNextF = {DirPredictionF[1], GHRF[k-1:1]}; - else GHRNextF = GHRF; - end + // If Fetch has a branch, speculatively insert prediction into the GHR + // If the front end is flushed or the direction prediction is wrong, reset to + // most recent valid GHR. For a BP wrong this is GHRD with the correct prediction shifted in. + // For FlushE this is GHRE. GHRNextE is both. + assign FlushDOrDirWrong = FlushD | DirPredictionWrongE; + mux3 #(k) GHRFMux(GHRF, {DirPredictionF[1], GHRF[k-1:1]}, GHRNextE[k-1:0], + {FlushDOrDirWrong, PredInstrClassF[0]}, GHRNextF); - flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF); - flopenr #(1) GHRFExtraReg(clk, reset, (~StallF) | FlushD, GHRF[0], GHRExtraF); + // Need 1 extra bit to store the shifted out GHRF if repair needs to back shift. + flopenr #(k) GHRFReg(clk, reset, ~StallF | FlushDOrDirWrong, GHRNextF, GHRF); + flopenr #(1) GHRFLastReg(clk, reset, ~StallF | FlushDOrDirWrong, GHRF[0], GHRLastF); - // use with out instruction class prediction - //assign GHRNextD = FlushD ? GHRNextE[k-1:0] : GHRF[k-1:0]; - // with instruction class prediction - assign GHRNextD = (FlushD | DirPredictionWrongE) ? GHRNextE[k-1:0] : - WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], GHRF[k-1:1]} : // shift right - WrongPredInstrClassD[0] & ~BranchInstrD ? {GHRF[k-2:0], GHRExtraF}: // shift left - GHRF[k-1:0]; + // With instruction class prediction, the class could be wrong and is checked in Decode. + // If it is wrong and branch does exist then shift right and insert the prediction. + // If the branch does not exist then shift left and use GHRLastF to restore the LSB. + logic [k-1:0] GHRClassWrong; + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. + mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); - flopenr #(k) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD); + flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - assign GHRNextE = BranchInstrE & ~FlushM ? {PCSrcE, GHRD[k-2:0]} : // if the branch is not flushed - FlushE ? GHRNextM : // branch is flushed - GHRD; - flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - assign GHRNextM = FlushM ? GHRNextW : GHRE; - flopenr #(k) GHRMReg(clk, reset, (~StallM) | FlushM, GHRNextM, GHRM); - - assign GHRNextW = FlushW ? GHRW : GHRM; - flopenr #(k) GHRWReg(clk, reset, (BranchInstrW & ~StallW) | FlushW, GHRNextW, GHRW); + flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; endmodule From f2c7a489b222c0719fda349819775abc02703377 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 9 Feb 2023 18:14:26 -0800 Subject: [PATCH 09/14] Test gen header --- tests/testgen/testgen_header.S | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/testgen/testgen_header.S b/tests/testgen/testgen_header.S index 4129782f..44a74f5d 100644 --- a/tests/testgen/testgen_header.S +++ b/tests/testgen/testgen_header.S @@ -1,21 +1,12 @@ // // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 /////////////////////////////////////////// #include "model_test.h" #include "arch_test.h" +RVTEST_ISA("RV64I") .section .text.init .globl rvtest_entry_point From 51a792431f9a62d7f3ce80f900668ca1a000222b Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 9 Feb 2023 18:24:48 -0800 Subject: [PATCH 10/14] Moved test generators --- .../testgen}/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py | 0 .../testgen}/testgen-ADDI-XORI-ORI-ANDI-SLTI.py | 0 .../testgen}/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py | 0 .../testgen}/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-BRANCH.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-CSR.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-JAL-JALR.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-LOAD.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-PIPELINE.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-SLL-SRL-SRA.py | 0 .../testgen/imperas => studies/testgen}/testgen-SLLI-SRLI-SRAI.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-SLTIU.py | 0 {tests/testgen/imperas => studies/testgen}/testgen-STORE.py | 0 .../testgen/imperas => studies/testgen}/testgen-VIRTUALMEMORY.py | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename {tests/testgen/imperas => studies/testgen}/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-ADDI-XORI-ORI-ANDI-SLTI.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-BRANCH.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-CSR.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-JAL-JALR.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-LOAD.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-PIPELINE.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-SLL-SRL-SRA.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-SLLI-SRLI-SRAI.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-SLTIU.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-STORE.py (100%) rename {tests/testgen/imperas => studies/testgen}/testgen-VIRTUALMEMORY.py (100%) diff --git a/tests/testgen/imperas/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py b/studies/testgen/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py similarity index 100% rename from tests/testgen/imperas/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py rename to studies/testgen/testgen-ADD-SUB-SLT-SLTU-XOR-OR-AND.py diff --git a/tests/testgen/imperas/testgen-ADDI-XORI-ORI-ANDI-SLTI.py b/studies/testgen/testgen-ADDI-XORI-ORI-ANDI-SLTI.py similarity index 100% rename from tests/testgen/imperas/testgen-ADDI-XORI-ORI-ANDI-SLTI.py rename to studies/testgen/testgen-ADDI-XORI-ORI-ANDI-SLTI.py diff --git a/tests/testgen/imperas/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py b/studies/testgen/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py similarity index 100% rename from tests/testgen/imperas/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py rename to studies/testgen/testgen-ADDIW-SLLIW-SRLIW-SRAIW.py diff --git a/tests/testgen/imperas/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py b/studies/testgen/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py similarity index 100% rename from tests/testgen/imperas/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py rename to studies/testgen/testgen-ADDW-SUBW-SLLW-SRLW-SRAW.py diff --git a/tests/testgen/imperas/testgen-BRANCH.py b/studies/testgen/testgen-BRANCH.py similarity index 100% rename from tests/testgen/imperas/testgen-BRANCH.py rename to studies/testgen/testgen-BRANCH.py diff --git a/tests/testgen/imperas/testgen-CSR.py b/studies/testgen/testgen-CSR.py similarity index 100% rename from tests/testgen/imperas/testgen-CSR.py rename to studies/testgen/testgen-CSR.py diff --git a/tests/testgen/imperas/testgen-JAL-JALR.py b/studies/testgen/testgen-JAL-JALR.py similarity index 100% rename from tests/testgen/imperas/testgen-JAL-JALR.py rename to studies/testgen/testgen-JAL-JALR.py diff --git a/tests/testgen/imperas/testgen-LOAD.py b/studies/testgen/testgen-LOAD.py similarity index 100% rename from tests/testgen/imperas/testgen-LOAD.py rename to studies/testgen/testgen-LOAD.py diff --git a/tests/testgen/imperas/testgen-PIPELINE.py b/studies/testgen/testgen-PIPELINE.py similarity index 100% rename from tests/testgen/imperas/testgen-PIPELINE.py rename to studies/testgen/testgen-PIPELINE.py diff --git a/tests/testgen/imperas/testgen-SLL-SRL-SRA.py b/studies/testgen/testgen-SLL-SRL-SRA.py similarity index 100% rename from tests/testgen/imperas/testgen-SLL-SRL-SRA.py rename to studies/testgen/testgen-SLL-SRL-SRA.py diff --git a/tests/testgen/imperas/testgen-SLLI-SRLI-SRAI.py b/studies/testgen/testgen-SLLI-SRLI-SRAI.py similarity index 100% rename from tests/testgen/imperas/testgen-SLLI-SRLI-SRAI.py rename to studies/testgen/testgen-SLLI-SRLI-SRAI.py diff --git a/tests/testgen/imperas/testgen-SLTIU.py b/studies/testgen/testgen-SLTIU.py similarity index 100% rename from tests/testgen/imperas/testgen-SLTIU.py rename to studies/testgen/testgen-SLTIU.py diff --git a/tests/testgen/imperas/testgen-STORE.py b/studies/testgen/testgen-STORE.py similarity index 100% rename from tests/testgen/imperas/testgen-STORE.py rename to studies/testgen/testgen-STORE.py diff --git a/tests/testgen/imperas/testgen-VIRTUALMEMORY.py b/studies/testgen/testgen-VIRTUALMEMORY.py similarity index 100% rename from tests/testgen/imperas/testgen-VIRTUALMEMORY.py rename to studies/testgen/testgen-VIRTUALMEMORY.py From 8ad5f2b18109300e2aa5d3927066a964e1b71741 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 9 Feb 2023 18:25:24 -0800 Subject: [PATCH 11/14] Added RVTEST_CASE to testgen header --- tests/testgen/testgen_header.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testgen/testgen_header.S b/tests/testgen/testgen_header.S index 44a74f5d..a93e5af1 100644 --- a/tests/testgen/testgen_header.S +++ b/tests/testgen/testgen_header.S @@ -14,4 +14,7 @@ rvtest_entry_point: RVMODEL_BOOT RVTEST_CODE_BEGIN +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",temp) + + RVTEST_SIGBASE( x6, wally_signature) From 282ffd131363f34c9d2c39f07eb2c63ae3390f86 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 10 Feb 2023 09:06:51 -0600 Subject: [PATCH 12/14] RAS and RAS documentation now consistent. --- src/ifu/bpred/RASPredictor.sv | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 624d8e64..0a841ae1 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -42,7 +42,7 @@ module RASPredictor #(parameter int StackSize = 16 )( logic CounterEn; localparam Depth = $clog2(StackSize); - logic [Depth-1:0] NextPtr, Ptr, PtrP1, PtrM1; + logic [Depth-1:0] NextPtr, Ptr, P1, M1, IncDecPtr; logic [StackSize-1:0] [`XLEN-1:0] memory; integer index; @@ -71,10 +71,11 @@ module RASPredictor #(parameter int StackSize = 16 )( assign CounterEn = PopF | PushE | RepairD; assign DecrementPtr = (PopF | DecRepairD) & ~IncrRepairD; - mux2 #(Depth) PtrMux(PtrP1, PtrM1, DecrementPtr, NextPtr); - assign PtrM1 = Ptr - 1'b1; - assign PtrP1 = Ptr + 1'b1; + assign P1 = 1; + assign M1 = '1; // -1 + mux2 #(Depth) PtrMux(P1, M1, DecrementPtr, IncDecPtr); + assign NextPtr = Ptr + IncDecPtr; flopenr #(Depth) PTR(clk, reset, CounterEn, NextPtr, Ptr); @@ -84,7 +85,7 @@ module RASPredictor #(parameter int StackSize = 16 )( for(index=0; index Date: Fri, 10 Feb 2023 09:07:06 -0600 Subject: [PATCH 13/14] Added new features to branch predictor analysis script. --- bin/parseHPMC.py | 62 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 35bb9c82..5b5ce522 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -28,6 +28,7 @@ import os import sys import matplotlib.pyplot as plt +import re def ComputeCPI(benchmark): 'Computes and inserts CPI into benchmark stats.' @@ -145,6 +146,11 @@ def FormatToPlot(currBenchmark): if(sys.argv[1] == '-b'): configList = [] + summery = 0 + if(sys.argv[2] == '-s'): + summery = 1 + sys.argv = sys.argv[1::] + print('summery = %d' % summery) for config in sys.argv[2::]: benchmarks = ProcessFile(config) ComputeAverage(benchmarks) @@ -171,18 +177,50 @@ if(sys.argv[1] == '-b'): size = len(benchmarkDict) index = 1 - print('Number of plots', size) - for benchmarkName in benchmarkDict: - currBenchmark = benchmarkDict[benchmarkName] - (names, values) = FormatToPlot(currBenchmark) - print(names, values) - plt.subplot(6, 7, index) - plt.bar(names, values) - plt.title(benchmarkName) - plt.ylabel('BR Dir Miss Rate (%)') - #plt.xlabel('Predictor') - index += 1 - #plt.tight_layout() + print('summery = %d' % summery) + if(summery == 0): + print('Number of plots', size) + for benchmarkName in benchmarkDict: + currBenchmark = benchmarkDict[benchmarkName] + (names, values) = FormatToPlot(currBenchmark) + print(names, values) + plt.subplot(6, 7, index) + plt.bar(names, values) + plt.title(benchmarkName) + plt.ylabel('BR Dir Miss Rate (%)') + #plt.xlabel('Predictor') + index += 1 + else: + combined = benchmarkDict['All_'] + (name, value) = FormatToPlot(combined) + lst = [] + dct = {} + category = [] + length = [] + accuracy = [] + for index in range(0, len(name)): + match = re.match(r"([a-z]+)([0-9]+)", name[index], re.I) + percent = 100 -value[index] + if match: + (PredType, size) = match.groups() + category.append(PredType) + length.append(size) + accuracy.append(percent) + if(PredType not in dct): + dct[PredType] = ([size], [percent]) + else: + (currSize, currPercent) = dct[PredType] + currSize.append(size) + currPercent.append(percent) + dct[PredType] = (currSize, currPercent) + print(dct) + for cat in dct: + (x, y) = dct[cat] + plt.scatter(x, y, label=cat) + plt.plot(x, y) + plt.ylabel('Prediction Accuracy') + plt.xlabel('Size (b or k)') + plt.legend(loc='upper left') plt.show() From c229f0064e38136416557bf274878ef524b8fb24 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 10 Feb 2023 10:33:10 -0600 Subject: [PATCH 14/14] Modified branch predictor to use InstrValidE and InstrValidD rather than the more complex InstrClassE | WrongClassE logic. --- src/ieu/controller.sv | 4 ++-- src/ieu/ieu.sv | 4 ++-- src/ifu/bpred/bpred.sv | 6 +++--- src/ifu/ifu.sv | 3 ++- src/wally/wallypipelinedcore.sv | 5 +++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 1819de17..108b0bb1 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -62,7 +62,8 @@ module controller( output logic [2:0] Funct3M, // Instruction's funct3 field output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ - output logic InstrValidM, // Instruction is valid + output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid + output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals input logic StallW, FlushW, // Stall, flush Writeback stage @@ -96,7 +97,6 @@ module controller( logic FenceXD; // Fence instruction logic InvalidateICacheD, FlushDCacheD;// Invalidate I$, flush D$ logic CSRWriteD, CSRWriteE; // CSR write - logic InstrValidD, InstrValidE; // Instruction is valid logic PrivilegedD, PrivilegedE; // Privileged instruction logic InvalidateICacheE, FlushDCacheE;// Invalidate I$, flush D$ logic [`CTRLW-1:0] ControlsD; // Main Instruction Decoder control signals diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 681bd982..9df95040 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -54,7 +54,7 @@ module ieu ( output logic [4:0] RdM, // Destination register input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ - output logic InstrValidM, // Instruction is valid + output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid // Writeback stage signals input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) input logic [`XLEN-1:0] CSRReadValW, // CSR read value, @@ -97,7 +97,7 @@ module ieu ( .PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE, .Funct3E, .IntDivE, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, - .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM, + .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); datapath dp( diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index fd9e8a92..c2ad9ac9 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -51,6 +51,7 @@ module bpred ( input logic [31:0] PostSpillInstrRawF, // Instruction // Branch and jump outcome + input logic InstrValidD, InstrValidE, input logic PCSrcE, // Executation stage branch is taken input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) @@ -220,8 +221,8 @@ module bpred ( assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; - // Finally indicate if the branch predictor was wrong - assign BPPredWrongE = PredictionPCWrongE & (|InstrClassE | AnyWrongPredInstrClassE); + // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. + assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -236,7 +237,6 @@ module bpred ( if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); else assign NextValidPCE = PCE; - // performance counters // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 956144de..68350bac 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -35,6 +35,7 @@ module ifu ( // Command from CPU input logic InvalidateICacheM, // Clears all instruction cache valid bits input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE) + input logic InstrValidD, InstrValidE, InstrValidM, // Bus interface output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU @@ -322,7 +323,7 @@ module ifu ( if (`BPRED_SUPPORTED) begin : bpred bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, - .FlushD, .FlushE, .FlushM, .FlushW, + .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 2df96d1f..3a57b9ae 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -68,7 +68,7 @@ module wallypipelinedcore ( logic [`XLEN-1:0] CSRReadValW, MDUResultW; logic [`XLEN-1:0] UnalignedPCNextF, PCNext2F; logic [1:0] MemRWM; - logic InstrValidM; + logic InstrValidD, InstrValidE, InstrValidM; logic InstrMisalignedFaultM; logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD; logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM; @@ -166,6 +166,7 @@ module wallypipelinedcore ( // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .InstrValidM, .InstrValidE, .InstrValidD, // Fetch .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, @@ -201,7 +202,7 @@ module wallypipelinedcore ( .RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, // Writeback stage .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), - .InstrValidM, .FCvtIntResW, .FCvtIntW, + .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE,