forked from Github_Repos/cvw
		
	Corrected a number of bugs in the branch predictor.
Added performance counters to individually track branches; jumps, jump register, jal, and jalr; return. jump and jump register are special cases of jal and jalr. Similarlly return is a special case of jalr. Also added counters to track if the branch direction was wrong, btb target wrong, or the ras target was wrong. Finally added one more counter to track if the BP incorrectly predicts a non-cfi instruction.
This commit is contained in:
		
							parent
							
								
									2a308309e4
								
							
						
					
					
						commit
						9172e52286
					
				@ -42,7 +42,7 @@ module BTBPredictor
 | 
			
		||||
   input logic 		    UpdateEN,
 | 
			
		||||
   input logic [`XLEN-1:0]  UpdatePC,
 | 
			
		||||
   input logic [`XLEN-1:0]  UpdateTarget,
 | 
			
		||||
   input logic [3:0] 	    UpdateInstrClass,
 | 
			
		||||
   input logic [4:0] 	    UpdateInstrClass,
 | 
			
		||||
   input logic 		    UpdateInvalid
 | 
			
		||||
   );
 | 
			
		||||
 | 
			
		||||
@ -89,7 +89,7 @@ module BTBPredictor
 | 
			
		||||
				  .WEN1(UpdateEN));
 | 
			
		||||
 -----/\----- EXCLUDED -----/\----- */
 | 
			
		||||
  
 | 
			
		||||
  flopenr #() UpdateENReg(.clk(clk),
 | 
			
		||||
  flopenr #(1) UpdateENReg(.clk(clk),
 | 
			
		||||
			  .reset(reset),
 | 
			
		||||
			  .en(~StallF),
 | 
			
		||||
			  .d(UpdateEN),
 | 
			
		||||
 | 
			
		||||
@ -55,7 +55,7 @@ module RASPredictor
 | 
			
		||||
  // may have to handle a push and an incr at the same time.
 | 
			
		||||
  // *** what happens if jal is executing and there is a return being flushed in Decode?
 | 
			
		||||
 | 
			
		||||
  flopenr #(StackSize) PTR(.clk(clk),
 | 
			
		||||
  flopenr #(Depth) PTR(.clk(clk),
 | 
			
		||||
			   .reset(reset),
 | 
			
		||||
			   .en(CounterEn),
 | 
			
		||||
			   .d(PtrD),
 | 
			
		||||
 | 
			
		||||
@ -49,7 +49,11 @@ module bpred
 | 
			
		||||
   input logic [`XLEN-1:0]  PCLinkE, // The address following the branch instruction. (AKA Fall through address)
 | 
			
		||||
   input logic [4:0] 	    InstrClassE,
 | 
			
		||||
   // Report branch prediction status
 | 
			
		||||
   output logic 	    BPPredWrongE
 | 
			
		||||
   output logic 	    BPPredWrongE,
 | 
			
		||||
   output logic 	    BPPredDirWrongE,
 | 
			
		||||
   output logic 	    BTBPredPCWrongE,
 | 
			
		||||
   output logic 	    RASPredPCWrongE,
 | 
			
		||||
   output logic 	    BPPredClassNonCFIWrongE
 | 
			
		||||
   );
 | 
			
		||||
 | 
			
		||||
  logic 		    BTBValidF;
 | 
			
		||||
@ -59,7 +63,6 @@ module bpred
 | 
			
		||||
  logic [`XLEN-1:0] 	    BTBPredPCF, RASPCF;
 | 
			
		||||
  logic 		    TargetWrongE;
 | 
			
		||||
  logic 		    FallThroughWrongE;
 | 
			
		||||
  logic 		    PredictionDirWrongE;
 | 
			
		||||
  logic 		    PredictionPCWrongE;
 | 
			
		||||
  logic 		    PredictionInstrClassWrongE;
 | 
			
		||||
  
 | 
			
		||||
@ -172,14 +175,14 @@ module bpred
 | 
			
		||||
			   .q(BPPredE));
 | 
			
		||||
 | 
			
		||||
  // pipeline the class
 | 
			
		||||
  flopenrc #(4) InstrClassRegD(.clk(clk),
 | 
			
		||||
  flopenrc #(5) InstrClassRegD(.clk(clk),
 | 
			
		||||
			       .reset(reset),
 | 
			
		||||
			       .en(~StallD),
 | 
			
		||||
			       .clear(FlushD),
 | 
			
		||||
			       .d(BPInstrClassF),
 | 
			
		||||
			       .q(BPInstrClassD));
 | 
			
		||||
 | 
			
		||||
  flopenrc #(4) InstrClassRegE(.clk(clk),
 | 
			
		||||
  flopenrc #(5) InstrClassRegE(.clk(clk),
 | 
			
		||||
			       .reset(reset),
 | 
			
		||||
			       .en(~StallE),
 | 
			
		||||
			       .clear(FlushE),
 | 
			
		||||
@ -189,13 +192,40 @@ module bpred
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  // Check the prediction makes execution.
 | 
			
		||||
 | 
			
		||||
  // first check if the target or fallthrough address matches what was predicted.
 | 
			
		||||
  assign TargetWrongE = PCTargetE != PCD;
 | 
			
		||||
  assign FallThroughWrongE = PCLinkE != PCD;
 | 
			
		||||
  assign PredictionDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
 | 
			
		||||
  assign PredictionPCWrongE = PCSrcE ? TargetWrongE : FallThroughWrongE;
 | 
			
		||||
  assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;  
 | 
			
		||||
  assign BPPredWrongE = ((PredictionPCWrongE | PredictionDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
 | 
			
		||||
  // If the target is taken check the target rather than fallthrough.  The instruction needs to be a branch if PCSrcE is selected
 | 
			
		||||
  // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken.  If the real instruction is non cfi
 | 
			
		||||
  // it must have selected teh fall through.
 | 
			
		||||
  assign PredictionPCWrongE = (PCSrcE  & (|InstrClassE) ? TargetWrongE : FallThroughWrongE);
 | 
			
		||||
 | 
			
		||||
  // The branch direction also need to checked.
 | 
			
		||||
  // However if the direction is wrong then the pc will be wrong.  This is only relavent to checking the
 | 
			
		||||
  // accuracy of the direciton prediction.
 | 
			
		||||
  assign BPPredDirWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0];
 | 
			
		||||
  
 | 
			
		||||
  // Finally we need to check if the class is wrong.  When the class is wrong the BTB needs to be updated.
 | 
			
		||||
  // Also we want to track this in a performance counter.
 | 
			
		||||
  assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
 | 
			
		||||
 | 
			
		||||
  // We want to output to the instruction fetch if the PC fetched was wrong.  If by chance the predictor was wrong about
 | 
			
		||||
  // the direction or class, but correct about the target we don't have the flush the pipeline.  However we still
 | 
			
		||||
  // need this information to verify the accuracy of the predictors.
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  //assign BPPredWrongE = ((PredictionPCWrongE | BPPredDirWrongE) & (|InstrClassE)) | PredictionInstrClassWrongE;
 | 
			
		||||
 | 
			
		||||
  assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
 | 
			
		||||
 | 
			
		||||
  // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
 | 
			
		||||
  assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE;
 | 
			
		||||
  // similar with RAS
 | 
			
		||||
  assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE;
 | 
			
		||||
  // Finally if the real instruction class is non CFI but the predictor said it was we need to count.
 | 
			
		||||
  assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
 | 
			
		||||
  
 | 
			
		||||
  // Update predictors
 | 
			
		||||
 | 
			
		||||
  satCounter2 BPDirUpdate(.BrDir(PCSrcE),
 | 
			
		||||
 | 
			
		||||
@ -27,14 +27,14 @@
 | 
			
		||||
`include "wally-config.vh"
 | 
			
		||||
 | 
			
		||||
module ifu (
 | 
			
		||||
  input  logic             clk, reset,
 | 
			
		||||
  input  logic             StallF, StallD, StallE, StallM, StallW,
 | 
			
		||||
  input  logic             FlushF, FlushD, FlushE, FlushM, FlushW,
 | 
			
		||||
  input logic 		   clk, reset,
 | 
			
		||||
  input logic 		   StallF, StallD, StallE, StallM, StallW,
 | 
			
		||||
  input logic 		   FlushF, FlushD, FlushE, FlushM, FlushW,
 | 
			
		||||
  // Fetch
 | 
			
		||||
  input  logic [`XLEN-1:0] InstrInF,
 | 
			
		||||
  input logic [`XLEN-1:0]  InstrInF,
 | 
			
		||||
  output logic [`XLEN-1:0] PCF, 
 | 
			
		||||
  output logic [`XLEN-1:0] InstrPAdrF,
 | 
			
		||||
  output logic             InstrReadF,
 | 
			
		||||
  output logic 		   InstrReadF,
 | 
			
		||||
  // Decode  
 | 
			
		||||
  // Execute
 | 
			
		||||
  output logic [`XLEN-1:0] PCLinkE,
 | 
			
		||||
@ -47,23 +47,26 @@ module ifu (
 | 
			
		||||
  input logic [`XLEN-1:0]  PrivilegedNextPCM, 
 | 
			
		||||
  output logic [31:0] 	   InstrD, InstrM,
 | 
			
		||||
  output logic [`XLEN-1:0] PCM, 
 | 
			
		||||
  output logic [3:0] InstrClassM,
 | 
			
		||||
  output logic BPPredWrongM,
 | 
			
		||||
  output logic [4:0] 	   InstrClassM,
 | 
			
		||||
  output logic 		   BPPredDirWrongM,
 | 
			
		||||
  output logic 		   BTBPredPCWrongM,
 | 
			
		||||
  output logic 		   RASPredPCWrongM,
 | 
			
		||||
  output logic 		   BPPredClassNonCFIWrongM,
 | 
			
		||||
  // Writeback
 | 
			
		||||
  // output logic [`XLEN-1:0] PCLinkW,
 | 
			
		||||
  // Faults
 | 
			
		||||
  input  logic             IllegalBaseInstrFaultD,
 | 
			
		||||
  output logic             IllegalIEUInstrFaultD,
 | 
			
		||||
  output logic             InstrMisalignedFaultM,
 | 
			
		||||
  input logic 		   IllegalBaseInstrFaultD,
 | 
			
		||||
  output logic 		   IllegalIEUInstrFaultD,
 | 
			
		||||
  output logic 		   InstrMisalignedFaultM,
 | 
			
		||||
  output logic [`XLEN-1:0] InstrMisalignedAdrM,
 | 
			
		||||
  // TLB management
 | 
			
		||||
  input logic  [1:0]       PrivilegeModeW,
 | 
			
		||||
  input logic  [`XLEN-1:0] PageTableEntryF,
 | 
			
		||||
  input logic  [`XLEN-1:0] SATP_REGW,
 | 
			
		||||
  input logic              ITLBWriteF, // ITLBFlushF,
 | 
			
		||||
  output logic             ITLBMissF, ITLBHitF,
 | 
			
		||||
  input logic [1:0] 	   PrivilegeModeW,
 | 
			
		||||
  input logic [`XLEN-1:0]  PageTableEntryF,
 | 
			
		||||
  input logic [`XLEN-1:0]  SATP_REGW,
 | 
			
		||||
  input logic 		   ITLBWriteF, // ITLBFlushF,
 | 
			
		||||
  output logic 		   ITLBMissF, ITLBHitF,
 | 
			
		||||
  // bogus
 | 
			
		||||
  input  logic [15:0] rd2
 | 
			
		||||
  input logic [15:0] 	   rd2
 | 
			
		||||
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
@ -135,7 +138,11 @@ module ifu (
 | 
			
		||||
	      .PCD(PCD),
 | 
			
		||||
	      .PCLinkE(PCLinkE),
 | 
			
		||||
	      .InstrClassE(InstrClassE),
 | 
			
		||||
	      .BPPredWrongE(BPPredWrongE));
 | 
			
		||||
	      .BPPredWrongE(BPPredWrongE),
 | 
			
		||||
 	      .BPPredDirWrongE(BPPredDirWrongE),
 | 
			
		||||
 	      .BTBPredPCWrongE(BTBPredPCWrongE),
 | 
			
		||||
 	      .RASPredPCWrongE(RASPredPCWrongE),
 | 
			
		||||
 	      .BPPredClassNonCFIWrongE(BPPredClassNonCFIWrongE));
 | 
			
		||||
  // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
 | 
			
		||||
  assign PCCorrectE =  PCSrcE ? PCTargetE : PCLinkE;
 | 
			
		||||
 | 
			
		||||
@ -216,12 +223,12 @@ module ifu (
 | 
			
		||||
			       .d(InstrClassE),
 | 
			
		||||
			       .q(InstrClassM));
 | 
			
		||||
 | 
			
		||||
  flopenrc #(1) BPPredWrongRegM(.clk(clk),
 | 
			
		||||
  flopenrc #(4) BPPredWrongRegM(.clk(clk),
 | 
			
		||||
			       .reset(reset),
 | 
			
		||||
			       .en(~StallM),
 | 
			
		||||
			       .clear(FlushM),
 | 
			
		||||
			       .d(BPPredWrongE),
 | 
			
		||||
			       .q(BPPredWrongM));
 | 
			
		||||
			       .d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
 | 
			
		||||
			       .q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
 | 
			
		||||
 | 
			
		||||
  // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.  
 | 
			
		||||
  // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
 | 
			
		||||
 | 
			
		||||
@ -33,8 +33,12 @@ module csr (
 | 
			
		||||
  input  logic [`XLEN-1:0] PCM, SrcAM,
 | 
			
		||||
  input  logic             CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
 | 
			
		||||
  input  logic             TimerIntM, ExtIntM, SwIntM,
 | 
			
		||||
  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
 | 
			
		||||
  input  logic [3:0]       InstrClassM,
 | 
			
		||||
  input  logic             InstrValidW, FloatRegWriteW, LoadStallD,
 | 
			
		||||
  input  logic 		   BPPredDirWrongM,
 | 
			
		||||
  input  logic 		   BTBPredPCWrongM,
 | 
			
		||||
  input  logic 		   RASPredPCWrongM,
 | 
			
		||||
  input  logic 		   BPPredClassNonCFIWrongM,
 | 
			
		||||
  input  logic [4:0]       InstrClassM,
 | 
			
		||||
  input  logic [1:0]       NextPrivilegeModeM, PrivilegeModeW,
 | 
			
		||||
  input  logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
 | 
			
		||||
  output logic [1:0]       STATUS_MPP,
 | 
			
		||||
 | 
			
		||||
@ -28,16 +28,20 @@
 | 
			
		||||
`include "wally-config.vh"
 | 
			
		||||
 | 
			
		||||
module csrc ( 
 | 
			
		||||
    input  logic             clk, reset,
 | 
			
		||||
    input  logic             StallD, StallE, StallM, StallW,
 | 
			
		||||
    input  logic             InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM,
 | 
			
		||||
    input  logic [3:0]       InstrClassM,
 | 
			
		||||
    input  logic [11:0]      CSRAdrM,
 | 
			
		||||
    input  logic [1:0]       PrivilegeModeW,
 | 
			
		||||
    input  logic [`XLEN-1:0] CSRWriteValM,
 | 
			
		||||
    input  logic [31:0]      MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
 | 
			
		||||
    input logic 	     clk, reset,
 | 
			
		||||
    input logic 	     StallD, StallE, StallM, StallW,
 | 
			
		||||
    input logic 	     InstrValidW, LoadStallD, CSRMWriteM,
 | 
			
		||||
    input logic 	     BPPredDirWrongM,
 | 
			
		||||
    input logic 	     BTBPredPCWrongM,
 | 
			
		||||
    input logic 	     RASPredPCWrongM,
 | 
			
		||||
    input logic 	     BPPredClassNonCFIWrongM,
 | 
			
		||||
    input logic [4:0] 	     InstrClassM,
 | 
			
		||||
    input logic [11:0] 	     CSRAdrM,
 | 
			
		||||
    input logic [1:0] 	     PrivilegeModeW,
 | 
			
		||||
    input logic [`XLEN-1:0]  CSRWriteValM,
 | 
			
		||||
    input logic [31:0] 	     MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW,
 | 
			
		||||
    output logic [`XLEN-1:0] CSRCReadValM,
 | 
			
		||||
    output logic             IllegalCSRCAccessM);
 | 
			
		||||
    output logic 	     IllegalCSRCAccessM);
 | 
			
		||||
 | 
			
		||||
    // create Counter arrays to store address of each counter 
 | 
			
		||||
    integer MHPMCOUNTER [`COUNTERS:0];
 | 
			
		||||
@ -64,9 +68,14 @@ module csrc (
 | 
			
		||||
    assign MCOUNTEN[1] = 1'b0;
 | 
			
		||||
    assign MCOUNTEN[2] = InstrValidW & ~StallW;
 | 
			
		||||
    assign MCOUNTEN[3] = LoadStallD & ~StallD;
 | 
			
		||||
    assign MCOUNTEN[4] = BPPredWrongM & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[4] = BPPredDirWrongM & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[5] = InstrClassM[0] & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[`COUNTERS:6] = 0; 
 | 
			
		||||
    assign MCOUNTEN[6] = BTBPredPCWrongM & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[8] = RASPredPCWrongM & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[9] = InstrClassM[3] & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[10] = BPPredClassNonCFIWrongM & ~StallM;
 | 
			
		||||
    assign MCOUNTEN[`COUNTERS:11] = 0; 
 | 
			
		||||
 | 
			
		||||
    genvar j;       
 | 
			
		||||
    generate
 | 
			
		||||
 | 
			
		||||
@ -36,8 +36,12 @@ module privileged (
 | 
			
		||||
  output logic [`XLEN-1:0] CSRReadValW,
 | 
			
		||||
  output logic [`XLEN-1:0] PrivilegedNextPCM,
 | 
			
		||||
  output logic             RetM, TrapM,
 | 
			
		||||
  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
 | 
			
		||||
  input  logic [3:0]       InstrClassM,
 | 
			
		||||
  input  logic             InstrValidW, FloatRegWriteW, LoadStallD,
 | 
			
		||||
  input  logic 		   BPPredDirWrongM,
 | 
			
		||||
  input  logic 		   BTBPredPCWrongM,
 | 
			
		||||
  input  logic 		   RASPredPCWrongM,
 | 
			
		||||
  input  logic 		   BPPredClassNonCFIWrongM,
 | 
			
		||||
  input  logic [4:0]       InstrClassM,
 | 
			
		||||
  input  logic             PrivilegedM,
 | 
			
		||||
  input  logic             InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
 | 
			
		||||
  input  logic             LoadMisalignedFaultM, LoadAccessFaultM,
 | 
			
		||||
 | 
			
		||||
@ -111,8 +111,13 @@ module wallypipelinedhart (
 | 
			
		||||
  logic             DataStall, InstrStall;
 | 
			
		||||
  logic             InstrAckD, MemAckW;
 | 
			
		||||
 | 
			
		||||
  logic             BPPredWrongE, BPPredWrongM;
 | 
			
		||||
  logic [3:0]       InstrClassM;
 | 
			
		||||
  logic             BPPredWrongE;
 | 
			
		||||
  logic 	    BPPredDirWrongM;
 | 
			
		||||
  logic 	    BTBPredPCWrongM;
 | 
			
		||||
  logic 	    RASPredPCWrongM;
 | 
			
		||||
  logic 	    BPPredClassNonCFIWrongM;
 | 
			
		||||
 | 
			
		||||
  logic [4:0]       InstrClassM;
 | 
			
		||||
  
 | 
			
		||||
           
 | 
			
		||||
  ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user