forked from Github_Repos/cvw
Corrected a number of bugs in the branch predictor.
Added performance counters to individually track branches; jumps, jump register, jal, and jalr; return. jump and jump register are special cases of jal and jalr. Similarlly return is a special case of jalr. Also added counters to track if the branch direction was wrong, btb target wrong, or the ras target was wrong. Finally added one more counter to track if the BP incorrectly predicts a non-cfi instruction.
This commit is contained in:
parent
2a308309e4
commit
9172e52286
@ -42,7 +42,7 @@ module BTBPredictor
|
||||
input logic UpdateEN,
|
||||
input logic [`XLEN-1:0] UpdatePC,
|
||||
input logic [`XLEN-1:0] UpdateTarget,
|
||||
input logic [3:0] UpdateInstrClass,
|
||||
input logic [4:0] UpdateInstrClass,
|
||||
input logic UpdateInvalid
|
||||
);
|
||||
|
||||
@ -89,7 +89,7 @@ module BTBPredictor
|
||||
.WEN1(UpdateEN));
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
flopenr #() UpdateENReg(.clk(clk),
|
||||
flopenr #(1) UpdateENReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallF),
|
||||
.d(UpdateEN),
|
||||
|
@ -55,7 +55,7 @@ module RASPredictor
|
||||
// may have to handle a push and an incr at the same time.
|
||||
// *** what happens if jal is executing and there is a return being flushed in Decode?
|
||||
|
||||
flopenr #(StackSize) PTR(.clk(clk),
|
||||
flopenr #(Depth) PTR(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(CounterEn),
|
||||
.d(PtrD),
|
||||
|
@ -49,7 +49,11 @@ module bpred
|
||||
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
input logic [4:0] InstrClassE,
|
||||
// Report branch prediction status
|
||||
output logic BPPredWrongE
|
||||
output logic BPPredWrongE,
|
||||
output logic BPPredDirWrongE,
|
||||
output logic BTBPredPCWrongE,
|
||||
output logic RASPredPCWrongE,
|
||||
output logic BPPredClassNonCFIWrongE
|
||||
);
|
||||
|
||||
logic BTBValidF;
|
||||
@ -59,7 +63,6 @@ module bpred
|
||||
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
||||
logic TargetWrongE;
|
||||
logic FallThroughWrongE;
|
||||
logic PredictionDirWrongE;
|
||||
logic PredictionPCWrongE;
|
||||
logic PredictionInstrClassWrongE;
|
||||
|
||||
@ -172,14 +175,14 @@ module bpred
|
||||
.q(BPPredE));
|
||||
|
||||
// pipeline the class
|
||||
flopenrc #(4) InstrClassRegD(.clk(clk),
|
||||
flopenrc #(5) InstrClassRegD(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallD),
|
||||
.clear(FlushD),
|
||||
.d(BPInstrClassF),
|
||||
.q(BPInstrClassD));
|
||||
|
||||
flopenrc #(4) InstrClassRegE(.clk(clk),
|
||||
flopenrc #(5) InstrClassRegE(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallE),
|
||||
.clear(FlushE),
|
||||
@ -189,13 +192,40 @@ module bpred
|
||||
|
||||
|
||||
// Check the prediction makes execution.
|
||||
|
||||
// first check if the target or fallthrough address matches what was predicted.
|
||||
assign TargetWrongE = PCTargetE != PCD;
|
||||
assign FallThroughWrongE = PCLinkE != PCD;
|
||||
assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
|
||||
assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE;
|
||||
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
|
||||
assign BPPredWrongE = ((PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
|
||||
// If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected
|
||||
// Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi
|
||||
// it must have selected teh fall through.
|
||||
assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE);
|
||||
|
||||
// The branch direction also need to checked.
|
||||
// However if the direction is wrong then the pc will be wrong. This is only relavent to checking the
|
||||
// accuracy of the direciton prediction.
|
||||
assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
|
||||
|
||||
// Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated.
|
||||
// Also we want to track this in a performance counter.
|
||||
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
|
||||
|
||||
// We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about
|
||||
// the direction or class, but correct about the target we don't have the flush the pipeline. However we still
|
||||
// need this information to verify the accuracy of the predictors.
|
||||
|
||||
|
||||
//assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
|
||||
|
||||
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
|
||||
|
||||
// If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
|
||||
assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE;
|
||||
// similar with RAS
|
||||
assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE;
|
||||
// Finally if the real instruction class is non CFI but the predictor said it was we need to count.
|
||||
assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
|
||||
|
||||
// Update predictors
|
||||
|
||||
satCounter2 BPDirUpdate(.BrDir(PCSrcE),
|
||||
|
@ -27,14 +27,14 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ifu (
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
||||
// Fetch
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
output logic [`XLEN-1:0] PCF,
|
||||
output logic [`XLEN-1:0] InstrPAdrF,
|
||||
output logic InstrReadF,
|
||||
output logic InstrReadF,
|
||||
// Decode
|
||||
// Execute
|
||||
output logic [`XLEN-1:0] PCLinkE,
|
||||
@ -47,23 +47,26 @@ module ifu (
|
||||
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||
output logic [31:0] InstrD, InstrM,
|
||||
output logic [`XLEN-1:0] PCM,
|
||||
output logic [3:0] InstrClassM,
|
||||
output logic BPPredWrongM,
|
||||
output logic [4:0] InstrClassM,
|
||||
output logic BPPredDirWrongM,
|
||||
output logic BTBPredPCWrongM,
|
||||
output logic RASPredPCWrongM,
|
||||
output logic BPPredClassNonCFIWrongM,
|
||||
// Writeback
|
||||
// output logic [`XLEN-1:0] PCLinkW,
|
||||
// Faults
|
||||
input logic IllegalBaseInstrFaultD,
|
||||
output logic IllegalIEUInstrFaultD,
|
||||
output logic InstrMisalignedFaultM,
|
||||
input logic IllegalBaseInstrFaultD,
|
||||
output logic IllegalIEUInstrFaultD,
|
||||
output logic InstrMisalignedFaultM,
|
||||
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
||||
// TLB management
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] PageTableEntryF,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic ITLBWriteF, // ITLBFlushF,
|
||||
output logic ITLBMissF, ITLBHitF,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] PageTableEntryF,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic ITLBWriteF, // ITLBFlushF,
|
||||
output logic ITLBMissF, ITLBHitF,
|
||||
// bogus
|
||||
input logic [15:0] rd2
|
||||
input logic [15:0] rd2
|
||||
|
||||
);
|
||||
|
||||
@ -135,7 +138,11 @@ module ifu (
|
||||
.PCD(PCD),
|
||||
.PCLinkE(PCLinkE),
|
||||
.InstrClassE(InstrClassE),
|
||||
.BPPredWrongE(BPPredWrongE));
|
||||
.BPPredWrongE(BPPredWrongE),
|
||||
.BPPredDirWrongE(BPPredDirWrongE),
|
||||
.BTBPredPCWrongE(BTBPredPCWrongE),
|
||||
.RASPredPCWrongE(RASPredPCWrongE),
|
||||
.BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE));
|
||||
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
|
||||
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
|
||||
|
||||
@ -216,12 +223,12 @@ module ifu (
|
||||
.d(InstrClassE),
|
||||
.q(InstrClassM));
|
||||
|
||||
flopenrc #(1) BPPredWrongRegM(.clk(clk),
|
||||
flopenrc #(4) BPPredWrongRegM(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallM),
|
||||
.clear(FlushM),
|
||||
.d(BPPredWrongE),
|
||||
.q(BPPredWrongM));
|
||||
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
|
||||
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
|
||||
|
||||
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
||||
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
||||
|
@ -33,8 +33,12 @@ module csr (
|
||||
input logic [`XLEN-1:0] PCM, SrcAM,
|
||||
input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
|
||||
input logic TimerIntM, ExtIntM, SwIntM,
|
||||
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic InstrValidW, FloatRegWriteW, LoadStallD,
|
||||
input logic BPPredDirWrongM,
|
||||
input logic BTBPredPCWrongM,
|
||||
input logic RASPredPCWrongM,
|
||||
input logic BPPredClassNonCFIWrongM,
|
||||
input logic [4:0] InstrClassM,
|
||||
input logic [1:0] NextPrivilegeModeM, PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
|
||||
output logic [1:0] STATUS_MPP,
|
||||
|
@ -28,16 +28,20 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module csrc (
|
||||
input logic clk, reset,
|
||||
input logic StallD, StallE, StallM, StallW,
|
||||
input logic InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM,
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
||||
input logic clk, reset,
|
||||
input logic StallD, StallE, StallM, StallW,
|
||||
input logic InstrValidW, LoadStallD, CSRMWriteM,
|
||||
input logic BPPredDirWrongM,
|
||||
input logic BTBPredPCWrongM,
|
||||
input logic RASPredPCWrongM,
|
||||
input logic BPPredClassNonCFIWrongM,
|
||||
input logic [4:0] InstrClassM,
|
||||
input logic [11:0] CSRAdrM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] CSRWriteValM,
|
||||
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
||||
output logic [`XLEN-1:0] CSRCReadValM,
|
||||
output logic IllegalCSRCAccessM);
|
||||
output logic IllegalCSRCAccessM);
|
||||
|
||||
// create Counter arrays to store address of each counter
|
||||
integer MHPMCOUNTER [`COUNTERS:0];
|
||||
@ -64,9 +68,14 @@ module csrc (
|
||||
assign MCOUNTEN[1] = 1'b0;
|
||||
assign MCOUNTEN[2] = InstrValidW & ~StallW;
|
||||
assign MCOUNTEN[3] = LoadStallD & ~StallD;
|
||||
assign MCOUNTEN[4] = BPPredWrongM & ~StallM;
|
||||
assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM;
|
||||
assign MCOUNTEN[5] = InstrClassM[0] & ~StallM;
|
||||
assign MCOUNTEN[`COUNTERS:6] = 0;
|
||||
assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM;
|
||||
assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM;
|
||||
assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM;
|
||||
assign MCOUNTEN[9] = InstrClassM[3] & ~StallM;
|
||||
assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM;
|
||||
assign MCOUNTEN[`COUNTERS:11] = 0;
|
||||
|
||||
genvar j;
|
||||
generate
|
||||
|
@ -36,8 +36,12 @@ module privileged (
|
||||
output logic [`XLEN-1:0] CSRReadValW,
|
||||
output logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||
output logic RetM, TrapM,
|
||||
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
|
||||
input logic [3:0] InstrClassM,
|
||||
input logic InstrValidW, FloatRegWriteW, LoadStallD,
|
||||
input logic BPPredDirWrongM,
|
||||
input logic BTBPredPCWrongM,
|
||||
input logic RASPredPCWrongM,
|
||||
input logic BPPredClassNonCFIWrongM,
|
||||
input logic [4:0] InstrClassM,
|
||||
input logic PrivilegedM,
|
||||
input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
|
||||
input logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
|
@ -111,8 +111,13 @@ module wallypipelinedhart (
|
||||
logic DataStall, InstrStall;
|
||||
logic InstrAckD, MemAckW;
|
||||
|
||||
logic BPPredWrongE, BPPredWrongM;
|
||||
logic [3:0] InstrClassM;
|
||||
logic BPPredWrongE;
|
||||
logic BPPredDirWrongM;
|
||||
logic BTBPredPCWrongM;
|
||||
logic RASPredPCWrongM;
|
||||
logic BPPredClassNonCFIWrongM;
|
||||
|
||||
logic [4:0] InstrClassM;
|
||||
|
||||
|
||||
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
|
||||
|
Loading…
Reference in New Issue
Block a user