From 9172e522867f1176561974f2e8cfab18417eb0d5 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 31 Mar 2021 11:54:02 -0500 Subject: [PATCH] Corrected a number of bugs in the branch predictor. Added performance counters to individually track branches; jumps, jump register, jal, and jalr; return. jump and jump register are special cases of jal and jalr. Similarlly return is a special case of jalr. Also added counters to track if the branch direction was wrong, btb target wrong, or the ras target was wrong. Finally added one more counter to track if the BP incorrectly predicts a non-cfi instruction. --- wally-pipelined/src/ifu/BTBPredictor.sv | 4 +- wally-pipelined/src/ifu/RAsPredictor.sv | 2 +- wally-pipelined/src/ifu/bpred.sv | 46 ++++++++++++++---- wally-pipelined/src/ifu/ifu.sv | 47 +++++++++++-------- wally-pipelined/src/privileged/csr.sv | 8 +++- wally-pipelined/src/privileged/csrc.sv | 31 +++++++----- wally-pipelined/src/privileged/privileged.sv | 8 +++- .../src/wally/wallypipelinedhart.sv | 9 +++- 8 files changed, 107 insertions(+), 48 deletions(-) diff --git a/wally-pipelined/src/ifu/BTBPredictor.sv b/wally-pipelined/src/ifu/BTBPredictor.sv index b342c11b..4b54c0bb 100644 --- a/wally-pipelined/src/ifu/BTBPredictor.sv +++ b/wally-pipelined/src/ifu/BTBPredictor.sv @@ -42,7 +42,7 @@ module BTBPredictor input logic UpdateEN, input logic [`XLEN-1:0] UpdatePC, input logic [`XLEN-1:0] UpdateTarget, - input logic [3:0] UpdateInstrClass, + input logic [4:0] UpdateInstrClass, input logic UpdateInvalid ); @@ -89,7 +89,7 @@ module BTBPredictor .WEN1(UpdateEN)); -----/\----- EXCLUDED -----/\----- */ - flopenr #() UpdateENReg(.clk(clk), + flopenr #(1) UpdateENReg(.clk(clk), .reset(reset), .en(~StallF), .d(UpdateEN), diff --git a/wally-pipelined/src/ifu/RAsPredictor.sv b/wally-pipelined/src/ifu/RAsPredictor.sv index 166ff911..bde30be5 100644 --- a/wally-pipelined/src/ifu/RAsPredictor.sv +++ b/wally-pipelined/src/ifu/RAsPredictor.sv @@ -55,7 +55,7 @@ module RASPredictor // may have to handle a push and an incr at the same time. // *** what happens if jal is executing and there is a return being flushed in Decode? - flopenr #(StackSize) PTR(.clk(clk), + flopenr #(Depth) PTR(.clk(clk), .reset(reset), .en(CounterEn), .d(PtrD), diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index ba9c688d..5de3f4ec 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -49,7 +49,11 @@ module bpred input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [4:0] InstrClassE, // Report branch prediction status - output logic BPPredWrongE + output logic BPPredWrongE, + output logic BPPredDirWrongE, + output logic BTBPredPCWrongE, + output logic RASPredPCWrongE, + output logic BPPredClassNonCFIWrongE ); logic BTBValidF; @@ -59,7 +63,6 @@ module bpred logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic TargetWrongE; logic FallThroughWrongE; - logic PredictionDirWrongE; logic PredictionPCWrongE; logic PredictionInstrClassWrongE; @@ -172,14 +175,14 @@ module bpred .q(BPPredE)); // pipeline the class - flopenrc #(4) InstrClassRegD(.clk(clk), + flopenrc #(5) InstrClassRegD(.clk(clk), .reset(reset), .en(~StallD), .clear(FlushD), .d(BPInstrClassF), .q(BPInstrClassD)); - flopenrc #(4) InstrClassRegE(.clk(clk), + flopenrc #(5) InstrClassRegE(.clk(clk), .reset(reset), .en(~StallE), .clear(FlushE), @@ -189,13 +192,40 @@ module bpred // Check the prediction makes execution. + + // first check if the target or fallthrough address matches what was predicted. assign TargetWrongE = PCTargetE != PCD; assign FallThroughWrongE = PCLinkE != PCD; - assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; - assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE; - assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; - assign BPPredWrongE = ((PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE; + // If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected + // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi + // it must have selected teh fall through. + assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE); + // The branch direction also need to checked. + // However if the direction is wrong then the pc will be wrong. This is only relavent to checking the + // accuracy of the direciton prediction. + assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; + + // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. + // Also we want to track this in a performance counter. + assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; + + // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about + // the direction or class, but correct about the target we don't have the flush the pipeline. However we still + // need this information to verify the accuracy of the predictors. + + + //assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE; + + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; + + // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. + assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE; + // similar with RAS + assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE; + // Finally if the real instruction class is non CFI but the predictor said it was we need to count. + assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; + // Update predictors satCounter2 BPDirUpdate(.BrDir(PCSrcE), diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 30e25bea..79d5878a 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -27,14 +27,14 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushF, FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [`XLEN-1:0] InstrInF, + input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // Decode // Execute output logic [`XLEN-1:0] PCLinkE, @@ -47,23 +47,26 @@ module ifu ( input logic [`XLEN-1:0] PrivilegedNextPCM, output logic [31:0] InstrD, InstrM, output logic [`XLEN-1:0] PCM, - output logic [3:0] InstrClassM, - output logic BPPredWrongM, + output logic [4:0] InstrClassM, + output logic BPPredDirWrongM, + output logic BTBPredPCWrongM, + output logic RASPredPCWrongM, + output logic BPPredClassNonCFIWrongM, // Writeback // output logic [`XLEN-1:0] PCLinkW, // Faults - input logic IllegalBaseInstrFaultD, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, + input logic IllegalBaseInstrFaultD, + output logic IllegalIEUInstrFaultD, + output logic InstrMisalignedFaultM, output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PageTableEntryF, - input logic [`XLEN-1:0] SATP_REGW, - input logic ITLBWriteF, // ITLBFlushF, - output logic ITLBMissF, ITLBHitF, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] PageTableEntryF, + input logic [`XLEN-1:0] SATP_REGW, + input logic ITLBWriteF, // ITLBFlushF, + output logic ITLBMissF, ITLBHitF, // bogus - input logic [15:0] rd2 + input logic [15:0] rd2 ); @@ -135,7 +138,11 @@ module ifu ( .PCD(PCD), .PCLinkE(PCLinkE), .InstrClassE(InstrClassE), - .BPPredWrongE(BPPredWrongE)); + .BPPredWrongE(BPPredWrongE), + .BPPredDirWrongE(BPPredDirWrongE), + .BTBPredPCWrongE(BTBPredPCWrongE), + .RASPredPCWrongE(RASPredPCWrongE), + .BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE)); // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE. assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE; @@ -216,12 +223,12 @@ module ifu ( .d(InstrClassE), .q(InstrClassM)); - flopenrc #(1) BPPredWrongRegM(.clk(clk), + flopenrc #(4) BPPredWrongRegM(.clk(clk), .reset(reset), .en(~StallM), .clear(FlushM), - .d(BPPredWrongE), - .q(BPPredWrongM)); + .d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}), + .q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM})); // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 79e81303..2148d885 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -33,8 +33,12 @@ module csr ( input logic [`XLEN-1:0] PCM, SrcAM, input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM, input logic TimerIntM, ExtIntM, SwIntM, - input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM, - input logic [3:0] InstrClassM, + input logic InstrValidW, FloatRegWriteW, LoadStallD, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, input logic [`XLEN-1:0] CauseM, NextFaultMtvalM, output logic [1:0] STATUS_MPP, diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv index ba90a48a..01e3a168 100644 --- a/wally-pipelined/src/privileged/csrc.sv +++ b/wally-pipelined/src/privileged/csrc.sv @@ -28,16 +28,20 @@ `include "wally-config.vh" module csrc ( - input logic clk, reset, - input logic StallD, StallE, StallM, StallW, - input logic InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM, - input logic [3:0] InstrClassM, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic clk, reset, + input logic StallD, StallE, StallM, StallW, + input logic InstrValidW, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] CSRWriteValM, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM); + output logic IllegalCSRCAccessM); // create Counter arrays to store address of each counter integer MHPMCOUNTER [`COUNTERS:0]; @@ -64,9 +68,14 @@ module csrc ( assign MCOUNTEN[1] = 1'b0; assign MCOUNTEN[2] = InstrValidW & ~StallW; assign MCOUNTEN[3] = LoadStallD & ~StallD; - assign MCOUNTEN[4] = BPPredWrongM & ~StallM; + assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM; assign MCOUNTEN[5] = InstrClassM[0] & ~StallM; - assign MCOUNTEN[`COUNTERS:6] = 0; + assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM; + assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM; + assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM; + assign MCOUNTEN[9] = InstrClassM[3] & ~StallM; + assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM; + assign MCOUNTEN[`COUNTERS:11] = 0; genvar j; generate diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index f863b7fa..bb8e7e5e 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -36,8 +36,12 @@ module privileged ( output logic [`XLEN-1:0] CSRReadValW, output logic [`XLEN-1:0] PrivilegedNextPCM, output logic RetM, TrapM, - input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM, - input logic [3:0] InstrClassM, + input logic InstrValidW, FloatRegWriteW, LoadStallD, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, input logic PrivilegedM, input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD, input logic LoadMisalignedFaultM, LoadAccessFaultM, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 5975015f..1559c906 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -111,8 +111,13 @@ module wallypipelinedhart ( logic DataStall, InstrStall; logic InstrAckD, MemAckW; - logic BPPredWrongE, BPPredWrongM; - logic [3:0] InstrClassM; + logic BPPredWrongE; + logic BPPredDirWrongM; + logic BTBPredPCWrongM; + logic RASPredPCWrongM; + logic BPPredClassNonCFIWrongM; + + logic [4:0] InstrClassM; ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache