diff --git a/wally-pipelined/src/ifu/BTBPredictor.sv b/wally-pipelined/src/ifu/BTBPredictor.sv index b342c11b..4b54c0bb 100644 --- a/wally-pipelined/src/ifu/BTBPredictor.sv +++ b/wally-pipelined/src/ifu/BTBPredictor.sv @@ -42,7 +42,7 @@ module BTBPredictor input logic UpdateEN, input logic [`XLEN-1:0] UpdatePC, input logic [`XLEN-1:0] UpdateTarget, - input logic [3:0] UpdateInstrClass, + input logic [4:0] UpdateInstrClass, input logic UpdateInvalid ); @@ -89,7 +89,7 @@ module BTBPredictor .WEN1(UpdateEN)); -----/\----- EXCLUDED -----/\----- */ - flopenr #() UpdateENReg(.clk(clk), + flopenr #(1) UpdateENReg(.clk(clk), .reset(reset), .en(~StallF), .d(UpdateEN), diff --git a/wally-pipelined/src/ifu/RAsPredictor.sv b/wally-pipelined/src/ifu/RAsPredictor.sv index 166ff911..bde30be5 100644 --- a/wally-pipelined/src/ifu/RAsPredictor.sv +++ b/wally-pipelined/src/ifu/RAsPredictor.sv @@ -55,7 +55,7 @@ module RASPredictor // may have to handle a push and an incr at the same time. // *** what happens if jal is executing and there is a return being flushed in Decode? - flopenr #(StackSize) PTR(.clk(clk), + flopenr #(Depth) PTR(.clk(clk), .reset(reset), .en(CounterEn), .d(PtrD), diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index ba9c688d..5de3f4ec 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -49,7 +49,11 @@ module bpred input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [4:0] InstrClassE, // Report branch prediction status - output logic BPPredWrongE + output logic BPPredWrongE, + output logic BPPredDirWrongE, + output logic BTBPredPCWrongE, + output logic RASPredPCWrongE, + output logic BPPredClassNonCFIWrongE ); logic BTBValidF; @@ -59,7 +63,6 @@ module bpred logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic TargetWrongE; logic FallThroughWrongE; - logic PredictionDirWrongE; logic PredictionPCWrongE; logic PredictionInstrClassWrongE; @@ -172,14 +175,14 @@ module bpred .q(BPPredE)); // pipeline the class - flopenrc #(4) InstrClassRegD(.clk(clk), + flopenrc #(5) InstrClassRegD(.clk(clk), .reset(reset), .en(~StallD), .clear(FlushD), .d(BPInstrClassF), .q(BPInstrClassD)); - flopenrc #(4) InstrClassRegE(.clk(clk), + flopenrc #(5) InstrClassRegE(.clk(clk), .reset(reset), .en(~StallE), .clear(FlushE), @@ -189,13 +192,40 @@ module bpred // Check the prediction makes execution. + + // first check if the target or fallthrough address matches what was predicted. assign TargetWrongE = PCTargetE != PCD; assign FallThroughWrongE = PCLinkE != PCD; - assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; - assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE; - assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; - assign BPPredWrongE = ((PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE; + // If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected + // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi + // it must have selected teh fall through. + assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE); + // The branch direction also need to checked. + // However if the direction is wrong then the pc will be wrong. This is only relavent to checking the + // accuracy of the direciton prediction. + assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; + + // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. + // Also we want to track this in a performance counter. + assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; + + // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about + // the direction or class, but correct about the target we don't have the flush the pipeline. However we still + // need this information to verify the accuracy of the predictors. + + + //assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE; + + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; + + // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. + assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE; + // similar with RAS + assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE; + // Finally if the real instruction class is non CFI but the predictor said it was we need to count. + assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; + // Update predictors satCounter2 BPDirUpdate(.BrDir(PCSrcE), diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 30e25bea..79d5878a 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -27,14 +27,14 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushF, FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [`XLEN-1:0] InstrInF, + input logic [`XLEN-1:0] InstrInF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // Decode // Execute output logic [`XLEN-1:0] PCLinkE, @@ -47,23 +47,26 @@ module ifu ( input logic [`XLEN-1:0] PrivilegedNextPCM, output logic [31:0] InstrD, InstrM, output logic [`XLEN-1:0] PCM, - output logic [3:0] InstrClassM, - output logic BPPredWrongM, + output logic [4:0] InstrClassM, + output logic BPPredDirWrongM, + output logic BTBPredPCWrongM, + output logic RASPredPCWrongM, + output logic BPPredClassNonCFIWrongM, // Writeback // output logic [`XLEN-1:0] PCLinkW, // Faults - input logic IllegalBaseInstrFaultD, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, + input logic IllegalBaseInstrFaultD, + output logic IllegalIEUInstrFaultD, + output logic InstrMisalignedFaultM, output logic [`XLEN-1:0] InstrMisalignedAdrM, // TLB management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PageTableEntryF, - input logic [`XLEN-1:0] SATP_REGW, - input logic ITLBWriteF, // ITLBFlushF, - output logic ITLBMissF, ITLBHitF, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] PageTableEntryF, + input logic [`XLEN-1:0] SATP_REGW, + input logic ITLBWriteF, // ITLBFlushF, + output logic ITLBMissF, ITLBHitF, // bogus - input logic [15:0] rd2 + input logic [15:0] rd2 ); @@ -135,7 +138,11 @@ module ifu ( .PCD(PCD), .PCLinkE(PCLinkE), .InstrClassE(InstrClassE), - .BPPredWrongE(BPPredWrongE)); + .BPPredWrongE(BPPredWrongE), + .BPPredDirWrongE(BPPredDirWrongE), + .BTBPredPCWrongE(BTBPredPCWrongE), + .RASPredPCWrongE(RASPredPCWrongE), + .BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE)); // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE. assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE; @@ -216,12 +223,12 @@ module ifu ( .d(InstrClassE), .q(InstrClassM)); - flopenrc #(1) BPPredWrongRegM(.clk(clk), + flopenrc #(4) BPPredWrongRegM(.clk(clk), .reset(reset), .en(~StallM), .clear(FlushM), - .d(BPPredWrongE), - .q(BPPredWrongM)); + .d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}), + .q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM})); // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 79e81303..2148d885 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -33,8 +33,12 @@ module csr ( input logic [`XLEN-1:0] PCM, SrcAM, input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM, input logic TimerIntM, ExtIntM, SwIntM, - input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM, - input logic [3:0] InstrClassM, + input logic InstrValidW, FloatRegWriteW, LoadStallD, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, input logic [`XLEN-1:0] CauseM, NextFaultMtvalM, output logic [1:0] STATUS_MPP, diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv index ba90a48a..01e3a168 100644 --- a/wally-pipelined/src/privileged/csrc.sv +++ b/wally-pipelined/src/privileged/csrc.sv @@ -28,16 +28,20 @@ `include "wally-config.vh" module csrc ( - input logic clk, reset, - input logic StallD, StallE, StallM, StallW, - input logic InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM, - input logic [3:0] InstrClassM, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic clk, reset, + input logic StallD, StallE, StallM, StallW, + input logic InstrValidW, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] CSRWriteValM, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM); + output logic IllegalCSRCAccessM); // create Counter arrays to store address of each counter integer MHPMCOUNTER [`COUNTERS:0]; @@ -64,9 +68,14 @@ module csrc ( assign MCOUNTEN[1] = 1'b0; assign MCOUNTEN[2] = InstrValidW & ~StallW; assign MCOUNTEN[3] = LoadStallD & ~StallD; - assign MCOUNTEN[4] = BPPredWrongM & ~StallM; + assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM; assign MCOUNTEN[5] = InstrClassM[0] & ~StallM; - assign MCOUNTEN[`COUNTERS:6] = 0; + assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM; + assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM; + assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM; + assign MCOUNTEN[9] = InstrClassM[3] & ~StallM; + assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM; + assign MCOUNTEN[`COUNTERS:11] = 0; genvar j; generate diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index f863b7fa..bb8e7e5e 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -36,8 +36,12 @@ module privileged ( output logic [`XLEN-1:0] CSRReadValW, output logic [`XLEN-1:0] PrivilegedNextPCM, output logic RetM, TrapM, - input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM, - input logic [3:0] InstrClassM, + input logic InstrValidW, FloatRegWriteW, LoadStallD, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, input logic PrivilegedM, input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD, input logic LoadMisalignedFaultM, LoadAccessFaultM, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 5975015f..1559c906 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -111,8 +111,13 @@ module wallypipelinedhart ( logic DataStall, InstrStall; logic InstrAckD, MemAckW; - logic BPPredWrongE, BPPredWrongM; - logic [3:0] InstrClassM; + logic BPPredWrongE; + logic BPPredDirWrongM; + logic BTBPredPCWrongM; + logic RASPredPCWrongM; + logic BPPredClassNonCFIWrongM; + + logic [4:0] InstrClassM; ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache