forked from Github_Repos/cvw
Corrected a number of bugs in the branch predictor.
Added performance counters to individually track branches; jumps, jump register, jal, and jalr; return. jump and jump register are special cases of jal and jalr. Similarlly return is a special case of jalr. Also added counters to track if the branch direction was wrong, btb target wrong, or the ras target was wrong. Finally added one more counter to track if the BP incorrectly predicts a non-cfi instruction.
This commit is contained in:
parent
2a308309e4
commit
9172e52286
@ -42,7 +42,7 @@ module BTBPredictor
|
|||||||
input logic UpdateEN,
|
input logic UpdateEN,
|
||||||
input logic [`XLEN-1:0] UpdatePC,
|
input logic [`XLEN-1:0] UpdatePC,
|
||||||
input logic [`XLEN-1:0] UpdateTarget,
|
input logic [`XLEN-1:0] UpdateTarget,
|
||||||
input logic [3:0] UpdateInstrClass,
|
input logic [4:0] UpdateInstrClass,
|
||||||
input logic UpdateInvalid
|
input logic UpdateInvalid
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ module BTBPredictor
|
|||||||
.WEN1(UpdateEN));
|
.WEN1(UpdateEN));
|
||||||
-----/\----- EXCLUDED -----/\----- */
|
-----/\----- EXCLUDED -----/\----- */
|
||||||
|
|
||||||
flopenr #() UpdateENReg(.clk(clk),
|
flopenr #(1) UpdateENReg(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallF),
|
.en(~StallF),
|
||||||
.d(UpdateEN),
|
.d(UpdateEN),
|
||||||
|
@ -55,7 +55,7 @@ module RASPredictor
|
|||||||
// may have to handle a push and an incr at the same time.
|
// may have to handle a push and an incr at the same time.
|
||||||
// *** what happens if jal is executing and there is a return being flushed in Decode?
|
// *** what happens if jal is executing and there is a return being flushed in Decode?
|
||||||
|
|
||||||
flopenr #(StackSize) PTR(.clk(clk),
|
flopenr #(Depth) PTR(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(CounterEn),
|
.en(CounterEn),
|
||||||
.d(PtrD),
|
.d(PtrD),
|
||||||
|
@ -49,7 +49,11 @@ module bpred
|
|||||||
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||||
input logic [4:0] InstrClassE,
|
input logic [4:0] InstrClassE,
|
||||||
// Report branch prediction status
|
// Report branch prediction status
|
||||||
output logic BPPredWrongE
|
output logic BPPredWrongE,
|
||||||
|
output logic BPPredDirWrongE,
|
||||||
|
output logic BTBPredPCWrongE,
|
||||||
|
output logic RASPredPCWrongE,
|
||||||
|
output logic BPPredClassNonCFIWrongE
|
||||||
);
|
);
|
||||||
|
|
||||||
logic BTBValidF;
|
logic BTBValidF;
|
||||||
@ -59,7 +63,6 @@ module bpred
|
|||||||
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
||||||
logic TargetWrongE;
|
logic TargetWrongE;
|
||||||
logic FallThroughWrongE;
|
logic FallThroughWrongE;
|
||||||
logic PredictionDirWrongE;
|
|
||||||
logic PredictionPCWrongE;
|
logic PredictionPCWrongE;
|
||||||
logic PredictionInstrClassWrongE;
|
logic PredictionInstrClassWrongE;
|
||||||
|
|
||||||
@ -172,14 +175,14 @@ module bpred
|
|||||||
.q(BPPredE));
|
.q(BPPredE));
|
||||||
|
|
||||||
// pipeline the class
|
// pipeline the class
|
||||||
flopenrc #(4) InstrClassRegD(.clk(clk),
|
flopenrc #(5) InstrClassRegD(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallD),
|
.en(~StallD),
|
||||||
.clear(FlushD),
|
.clear(FlushD),
|
||||||
.d(BPInstrClassF),
|
.d(BPInstrClassF),
|
||||||
.q(BPInstrClassD));
|
.q(BPInstrClassD));
|
||||||
|
|
||||||
flopenrc #(4) InstrClassRegE(.clk(clk),
|
flopenrc #(5) InstrClassRegE(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallE),
|
.en(~StallE),
|
||||||
.clear(FlushE),
|
.clear(FlushE),
|
||||||
@ -189,13 +192,40 @@ module bpred
|
|||||||
|
|
||||||
|
|
||||||
// Check the prediction makes execution.
|
// Check the prediction makes execution.
|
||||||
|
|
||||||
|
// first check if the target or fallthrough address matches what was predicted.
|
||||||
assign TargetWrongE = PCTargetE != PCD;
|
assign TargetWrongE = PCTargetE != PCD;
|
||||||
assign FallThroughWrongE = PCLinkE != PCD;
|
assign FallThroughWrongE = PCLinkE != PCD;
|
||||||
assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
|
// If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected
|
||||||
assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE;
|
// Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi
|
||||||
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
|
// it must have selected teh fall through.
|
||||||
assign BPPredWrongE = ((PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
|
assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE);
|
||||||
|
|
||||||
|
// The branch direction also need to checked.
|
||||||
|
// However if the direction is wrong then the pc will be wrong. This is only relavent to checking the
|
||||||
|
// accuracy of the direciton prediction.
|
||||||
|
assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
|
||||||
|
|
||||||
|
// Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated.
|
||||||
|
// Also we want to track this in a performance counter.
|
||||||
|
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
|
||||||
|
|
||||||
|
// We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about
|
||||||
|
// the direction or class, but correct about the target we don't have the flush the pipeline. However we still
|
||||||
|
// need this information to verify the accuracy of the predictors.
|
||||||
|
|
||||||
|
|
||||||
|
//assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
|
||||||
|
|
||||||
|
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
|
||||||
|
|
||||||
|
// If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
|
||||||
|
assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE;
|
||||||
|
// similar with RAS
|
||||||
|
assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE;
|
||||||
|
// Finally if the real instruction class is non CFI but the predictor said it was we need to count.
|
||||||
|
assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
|
||||||
|
|
||||||
// Update predictors
|
// Update predictors
|
||||||
|
|
||||||
satCounter2 BPDirUpdate(.BrDir(PCSrcE),
|
satCounter2 BPDirUpdate(.BrDir(PCSrcE),
|
||||||
|
@ -27,14 +27,14 @@
|
|||||||
`include "wally-config.vh"
|
`include "wally-config.vh"
|
||||||
|
|
||||||
module ifu (
|
module ifu (
|
||||||
input logic clk, reset,
|
input logic clk, reset,
|
||||||
input logic StallF, StallD, StallE, StallM, StallW,
|
input logic StallF, StallD, StallE, StallM, StallW,
|
||||||
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
||||||
// Fetch
|
// Fetch
|
||||||
input logic [`XLEN-1:0] InstrInF,
|
input logic [`XLEN-1:0] InstrInF,
|
||||||
output logic [`XLEN-1:0] PCF,
|
output logic [`XLEN-1:0] PCF,
|
||||||
output logic [`XLEN-1:0] InstrPAdrF,
|
output logic [`XLEN-1:0] InstrPAdrF,
|
||||||
output logic InstrReadF,
|
output logic InstrReadF,
|
||||||
// Decode
|
// Decode
|
||||||
// Execute
|
// Execute
|
||||||
output logic [`XLEN-1:0] PCLinkE,
|
output logic [`XLEN-1:0] PCLinkE,
|
||||||
@ -47,23 +47,26 @@ module ifu (
|
|||||||
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||||
output logic [31:0] InstrD, InstrM,
|
output logic [31:0] InstrD, InstrM,
|
||||||
output logic [`XLEN-1:0] PCM,
|
output logic [`XLEN-1:0] PCM,
|
||||||
output logic [3:0] InstrClassM,
|
output logic [4:0] InstrClassM,
|
||||||
output logic BPPredWrongM,
|
output logic BPPredDirWrongM,
|
||||||
|
output logic BTBPredPCWrongM,
|
||||||
|
output logic RASPredPCWrongM,
|
||||||
|
output logic BPPredClassNonCFIWrongM,
|
||||||
// Writeback
|
// Writeback
|
||||||
// output logic [`XLEN-1:0] PCLinkW,
|
// output logic [`XLEN-1:0] PCLinkW,
|
||||||
// Faults
|
// Faults
|
||||||
input logic IllegalBaseInstrFaultD,
|
input logic IllegalBaseInstrFaultD,
|
||||||
output logic IllegalIEUInstrFaultD,
|
output logic IllegalIEUInstrFaultD,
|
||||||
output logic InstrMisalignedFaultM,
|
output logic InstrMisalignedFaultM,
|
||||||
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
||||||
// TLB management
|
// TLB management
|
||||||
input logic [1:0] PrivilegeModeW,
|
input logic [1:0] PrivilegeModeW,
|
||||||
input logic [`XLEN-1:0] PageTableEntryF,
|
input logic [`XLEN-1:0] PageTableEntryF,
|
||||||
input logic [`XLEN-1:0] SATP_REGW,
|
input logic [`XLEN-1:0] SATP_REGW,
|
||||||
input logic ITLBWriteF, // ITLBFlushF,
|
input logic ITLBWriteF, // ITLBFlushF,
|
||||||
output logic ITLBMissF, ITLBHitF,
|
output logic ITLBMissF, ITLBHitF,
|
||||||
// bogus
|
// bogus
|
||||||
input logic [15:0] rd2
|
input logic [15:0] rd2
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -135,7 +138,11 @@ module ifu (
|
|||||||
.PCD(PCD),
|
.PCD(PCD),
|
||||||
.PCLinkE(PCLinkE),
|
.PCLinkE(PCLinkE),
|
||||||
.InstrClassE(InstrClassE),
|
.InstrClassE(InstrClassE),
|
||||||
.BPPredWrongE(BPPredWrongE));
|
.BPPredWrongE(BPPredWrongE),
|
||||||
|
.BPPredDirWrongE(BPPredDirWrongE),
|
||||||
|
.BTBPredPCWrongE(BTBPredPCWrongE),
|
||||||
|
.RASPredPCWrongE(RASPredPCWrongE),
|
||||||
|
.BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE));
|
||||||
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
|
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
|
||||||
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
|
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
|
||||||
|
|
||||||
@ -216,12 +223,12 @@ module ifu (
|
|||||||
.d(InstrClassE),
|
.d(InstrClassE),
|
||||||
.q(InstrClassM));
|
.q(InstrClassM));
|
||||||
|
|
||||||
flopenrc #(1) BPPredWrongRegM(.clk(clk),
|
flopenrc #(4) BPPredWrongRegM(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallM),
|
.en(~StallM),
|
||||||
.clear(FlushM),
|
.clear(FlushM),
|
||||||
.d(BPPredWrongE),
|
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
|
||||||
.q(BPPredWrongM));
|
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
|
||||||
|
|
||||||
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
||||||
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
||||||
|
@ -33,8 +33,12 @@ module csr (
|
|||||||
input logic [`XLEN-1:0] PCM, SrcAM,
|
input logic [`XLEN-1:0] PCM, SrcAM,
|
||||||
input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
|
input logic CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
|
||||||
input logic TimerIntM, ExtIntM, SwIntM,
|
input logic TimerIntM, ExtIntM, SwIntM,
|
||||||
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
|
input logic InstrValidW, FloatRegWriteW, LoadStallD,
|
||||||
input logic [3:0] InstrClassM,
|
input logic BPPredDirWrongM,
|
||||||
|
input logic BTBPredPCWrongM,
|
||||||
|
input logic RASPredPCWrongM,
|
||||||
|
input logic BPPredClassNonCFIWrongM,
|
||||||
|
input logic [4:0] InstrClassM,
|
||||||
input logic [1:0] NextPrivilegeModeM, PrivilegeModeW,
|
input logic [1:0] NextPrivilegeModeM, PrivilegeModeW,
|
||||||
input logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
|
input logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
|
||||||
output logic [1:0] STATUS_MPP,
|
output logic [1:0] STATUS_MPP,
|
||||||
|
@ -28,16 +28,20 @@
|
|||||||
`include "wally-config.vh"
|
`include "wally-config.vh"
|
||||||
|
|
||||||
module csrc (
|
module csrc (
|
||||||
input logic clk, reset,
|
input logic clk, reset,
|
||||||
input logic StallD, StallE, StallM, StallW,
|
input logic StallD, StallE, StallM, StallW,
|
||||||
input logic InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM,
|
input logic InstrValidW, LoadStallD, CSRMWriteM,
|
||||||
input logic [3:0] InstrClassM,
|
input logic BPPredDirWrongM,
|
||||||
input logic [11:0] CSRAdrM,
|
input logic BTBPredPCWrongM,
|
||||||
input logic [1:0] PrivilegeModeW,
|
input logic RASPredPCWrongM,
|
||||||
input logic [`XLEN-1:0] CSRWriteValM,
|
input logic BPPredClassNonCFIWrongM,
|
||||||
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
input logic [4:0] InstrClassM,
|
||||||
|
input logic [11:0] CSRAdrM,
|
||||||
|
input logic [1:0] PrivilegeModeW,
|
||||||
|
input logic [`XLEN-1:0] CSRWriteValM,
|
||||||
|
input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
|
||||||
output logic [`XLEN-1:0] CSRCReadValM,
|
output logic [`XLEN-1:0] CSRCReadValM,
|
||||||
output logic IllegalCSRCAccessM);
|
output logic IllegalCSRCAccessM);
|
||||||
|
|
||||||
// create Counter arrays to store address of each counter
|
// create Counter arrays to store address of each counter
|
||||||
integer MHPMCOUNTER [`COUNTERS:0];
|
integer MHPMCOUNTER [`COUNTERS:0];
|
||||||
@ -64,9 +68,14 @@ module csrc (
|
|||||||
assign MCOUNTEN[1] = 1'b0;
|
assign MCOUNTEN[1] = 1'b0;
|
||||||
assign MCOUNTEN[2] = InstrValidW & ~StallW;
|
assign MCOUNTEN[2] = InstrValidW & ~StallW;
|
||||||
assign MCOUNTEN[3] = LoadStallD & ~StallD;
|
assign MCOUNTEN[3] = LoadStallD & ~StallD;
|
||||||
assign MCOUNTEN[4] = BPPredWrongM & ~StallM;
|
assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM;
|
||||||
assign MCOUNTEN[5] = InstrClassM[0] & ~StallM;
|
assign MCOUNTEN[5] = InstrClassM[0] & ~StallM;
|
||||||
assign MCOUNTEN[`COUNTERS:6] = 0;
|
assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM;
|
||||||
|
assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM;
|
||||||
|
assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM;
|
||||||
|
assign MCOUNTEN[9] = InstrClassM[3] & ~StallM;
|
||||||
|
assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM;
|
||||||
|
assign MCOUNTEN[`COUNTERS:11] = 0;
|
||||||
|
|
||||||
genvar j;
|
genvar j;
|
||||||
generate
|
generate
|
||||||
|
@ -36,8 +36,12 @@ module privileged (
|
|||||||
output logic [`XLEN-1:0] CSRReadValW,
|
output logic [`XLEN-1:0] CSRReadValW,
|
||||||
output logic [`XLEN-1:0] PrivilegedNextPCM,
|
output logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||||
output logic RetM, TrapM,
|
output logic RetM, TrapM,
|
||||||
input logic InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
|
input logic InstrValidW, FloatRegWriteW, LoadStallD,
|
||||||
input logic [3:0] InstrClassM,
|
input logic BPPredDirWrongM,
|
||||||
|
input logic BTBPredPCWrongM,
|
||||||
|
input logic RASPredPCWrongM,
|
||||||
|
input logic BPPredClassNonCFIWrongM,
|
||||||
|
input logic [4:0] InstrClassM,
|
||||||
input logic PrivilegedM,
|
input logic PrivilegedM,
|
||||||
input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
|
input logic InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
|
||||||
input logic LoadMisalignedFaultM, LoadAccessFaultM,
|
input logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||||
|
@ -111,8 +111,13 @@ module wallypipelinedhart (
|
|||||||
logic DataStall, InstrStall;
|
logic DataStall, InstrStall;
|
||||||
logic InstrAckD, MemAckW;
|
logic InstrAckD, MemAckW;
|
||||||
|
|
||||||
logic BPPredWrongE, BPPredWrongM;
|
logic BPPredWrongE;
|
||||||
logic [3:0] InstrClassM;
|
logic BPPredDirWrongM;
|
||||||
|
logic BTBPredPCWrongM;
|
||||||
|
logic RASPredPCWrongM;
|
||||||
|
logic BPPredClassNonCFIWrongM;
|
||||||
|
|
||||||
|
logic [4:0] InstrClassM;
|
||||||
|
|
||||||
|
|
||||||
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
|
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
|
||||||
|
Loading…
Reference in New Issue
Block a user