forked from Github_Repos/cvw
		
	Converted to using the BTB to predict the instruction class.
This commit is contained in:
		
							parent
							
								
									7592a0dacb
								
							
						
					
					
						commit
						52d95d415f
					
				@ -35,11 +35,13 @@ module BTBPredictor
 | 
				
			|||||||
   input logic 		    reset,
 | 
					   input logic 		    reset,
 | 
				
			||||||
   input logic [`XLEN-1:0]  LookUpPC,
 | 
					   input logic [`XLEN-1:0]  LookUpPC,
 | 
				
			||||||
   output logic [`XLEN-1:0] TargetPC,
 | 
					   output logic [`XLEN-1:0] TargetPC,
 | 
				
			||||||
 | 
					   output logic [3:0] 	    InstrClass,
 | 
				
			||||||
   output logic 	    Valid,
 | 
					   output logic 	    Valid,
 | 
				
			||||||
   // update
 | 
					   // update
 | 
				
			||||||
   input logic 		    UpdateEN,
 | 
					   input logic 		    UpdateEN,
 | 
				
			||||||
   input logic [`XLEN-1:0]  UpdatePC,
 | 
					   input logic [`XLEN-1:0]  UpdatePC,
 | 
				
			||||||
   input logic [`XLEN-1:0]  UpdateTarget
 | 
					   input logic [`XLEN-1:0]  UpdateTarget,
 | 
				
			||||||
 | 
					   input logic [3:0] 	    UpdateInstrClass
 | 
				
			||||||
   );
 | 
					   );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  localparam TotalDepth = 2 ** Depth;
 | 
					  localparam TotalDepth = 2 ** Depth;
 | 
				
			||||||
@ -82,15 +84,15 @@ module BTBPredictor
 | 
				
			|||||||
  // and other indirection branch data.
 | 
					  // and other indirection branch data.
 | 
				
			||||||
  // Another optimization may be using a PC relative address.
 | 
					  // Another optimization may be using a PC relative address.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk),
 | 
					  SRAM2P1R1W #(Depth, `XLEN+4) memory(.clk(clk),
 | 
				
			||||||
				    .reset(reset),
 | 
									      .reset(reset),
 | 
				
			||||||
				    .RA1(LookUpPCIndex),
 | 
									      .RA1(LookUpPCIndex),
 | 
				
			||||||
				    .RD1(TargetPC),
 | 
									      .RD1({{InstrClass, TargetPC}}),
 | 
				
			||||||
				    .REN1(1'b1),
 | 
									      .REN1(1'b1),
 | 
				
			||||||
				    .WA1(UpdatePCIndex),
 | 
									      .WA1(UpdatePCIndex),
 | 
				
			||||||
				    .WD1(UpdateTarget),
 | 
									      .WD1({UpdateInstrClass, UpdateTarget}),
 | 
				
			||||||
				    .WEN1(UpdateEN),
 | 
									      .WEN1(UpdateEN),
 | 
				
			||||||
				    .BitWEN1({`XLEN{1'b1}}));
 | 
									      .BitWEN1({`XLEN{1'b1}}));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
endmodule
 | 
					endmodule
 | 
				
			||||||
 | 
				
			|||||||
@ -36,9 +36,6 @@ module bpred
 | 
				
			|||||||
   input logic [`XLEN-1:0]  PCNextF, // *** forgot to include this one on the I/O list
 | 
					   input logic [`XLEN-1:0]  PCNextF, // *** forgot to include this one on the I/O list
 | 
				
			||||||
   output logic [`XLEN-1:0] BPPredPCF,
 | 
					   output logic [`XLEN-1:0] BPPredPCF,
 | 
				
			||||||
   output logic 	    SelBPPredF,
 | 
					   output logic 	    SelBPPredF,
 | 
				
			||||||
   input logic [31:0] 	    InstrF, // we are going to use the opcode to indicate what type instruction this is.
 | 
					 | 
				
			||||||
   // if this is too slow we will have to predict the type of instruction.
 | 
					 | 
				
			||||||
   // Execute state
 | 
					 | 
				
			||||||
   // Update Predictor
 | 
					   // Update Predictor
 | 
				
			||||||
   input logic [`XLEN-1:0]  PCE, // The address of the currently executing instruction
 | 
					   input logic [`XLEN-1:0]  PCE, // The address of the currently executing instruction
 | 
				
			||||||
   // 1 hot encoding
 | 
					   // 1 hot encoding
 | 
				
			||||||
@ -50,6 +47,7 @@ module bpred
 | 
				
			|||||||
   input logic [`XLEN-1:0]  PCTargetE, // The branch destination if the branch is taken.
 | 
					   input logic [`XLEN-1:0]  PCTargetE, // The branch destination if the branch is taken.
 | 
				
			||||||
   input logic [`XLEN-1:0]  PCD, // The address the branch predictor took.
 | 
					   input logic [`XLEN-1:0]  PCD, // The address the branch predictor took.
 | 
				
			||||||
   input logic [`XLEN-1:0]  PCLinkE, // The address following the branch instruction. (AKA Fall through address)
 | 
					   input logic [`XLEN-1:0]  PCLinkE, // The address following the branch instruction. (AKA Fall through address)
 | 
				
			||||||
 | 
					   input logic [3:0] 	    InstrClassE,
 | 
				
			||||||
   // Report branch prediction status
 | 
					   // Report branch prediction status
 | 
				
			||||||
   output logic 	    BPPredWrongE
 | 
					   output logic 	    BPPredWrongE
 | 
				
			||||||
   );
 | 
					   );
 | 
				
			||||||
@ -57,7 +55,7 @@ module bpred
 | 
				
			|||||||
  logic 		    BTBValidF;
 | 
					  logic 		    BTBValidF;
 | 
				
			||||||
  logic [1:0] 		    BPPredF, BPPredD, BPPredE, UpdateBPPredE;
 | 
					  logic [1:0] 		    BPPredF, BPPredD, BPPredE, UpdateBPPredE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  logic [3:0] 		    InstrClassD, InstrClassF, InstrClassE;
 | 
					  logic [3:0] 		    BPInstrClassF, BPInstrClassD, BPInstrClassE;
 | 
				
			||||||
  logic [`XLEN-1:0] 	    BTBPredPCF, RASPCF;
 | 
					  logic [`XLEN-1:0] 	    BTBPredPCF, RASPCF;
 | 
				
			||||||
  logic 		    TargetWrongE;
 | 
					  logic 		    TargetWrongE;
 | 
				
			||||||
  logic 		    FallThroughWrongE;
 | 
					  logic 		    FallThroughWrongE;
 | 
				
			||||||
@ -65,17 +63,8 @@ module bpred
 | 
				
			|||||||
  logic 		    PredictionPCWrongE;
 | 
					  logic 		    PredictionPCWrongE;
 | 
				
			||||||
  logic [`XLEN-1:0] 	    CorrectPCE;
 | 
					  logic [`XLEN-1:0] 	    CorrectPCE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Part 1 decode the instruction class.
 | 
					 | 
				
			||||||
  // *** for now I'm skiping the compressed instructions
 | 
					 | 
				
			||||||
  assign InstrClassF[3] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // return
 | 
					 | 
				
			||||||
  // This is probably too much logic. 
 | 
					 | 
				
			||||||
  // *** This also encourages me to switch to predicting the class.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] != 5'h01; // jump register, but not return
 | 
					  // Part 1 branch direction prediction
 | 
				
			||||||
  assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump
 | 
					 | 
				
			||||||
  assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  // Part 2 branch direction prediction
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  twoBitPredictor DirPredictor(.clk(clk),
 | 
					  twoBitPredictor DirPredictor(.clk(clk),
 | 
				
			||||||
			       .reset(reset),
 | 
								       .reset(reset),
 | 
				
			||||||
@ -91,40 +80,42 @@ module bpred
 | 
				
			|||||||
  // 2) Any information which is necessary for the predictor to built it's next state.
 | 
					  // 2) Any information which is necessary for the predictor to built it's next state.
 | 
				
			||||||
  // For a 2 bit table this is the prediction count.
 | 
					  // For a 2 bit table this is the prediction count.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) | 
 | 
					  assign SelBPPredF = ((BPInstrClassF[0] & BPPredF[1] & BTBValidF) | 
 | 
				
			||||||
		       InstrClassF[3] |
 | 
							       BPInstrClassF[3] |
 | 
				
			||||||
		       (InstrClassF[2] & BTBValidF) | 
 | 
							       (BPInstrClassF[2] & BTBValidF) | 
 | 
				
			||||||
		       InstrClassF[1] & BTBValidF) ;
 | 
							       BPInstrClassF[1] & BTBValidF) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Part 3 Branch target address prediction
 | 
					  // Part 2 Branch target address prediction
 | 
				
			||||||
  // *** For now the BTB will house the direct and indirect targets
 | 
					  // *** For now the BTB will house the direct and indirect targets
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  BTBPredictor TargetPredictor(.clk(clk),
 | 
					  BTBPredictor TargetPredictor(.clk(clk),
 | 
				
			||||||
			       .reset(reset),
 | 
								       .reset(reset),
 | 
				
			||||||
			       .LookUpPC(PCNextF),
 | 
								       .LookUpPC(PCNextF),
 | 
				
			||||||
			       .TargetPC(BTBPredPCF),
 | 
								       .TargetPC(BTBPredPCF),
 | 
				
			||||||
 | 
								       .InstrClass(BPInstrClassF),
 | 
				
			||||||
			       .Valid(BTBValidF),
 | 
								       .Valid(BTBValidF),
 | 
				
			||||||
			       // update
 | 
								       // update
 | 
				
			||||||
			       .UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
 | 
								       .UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
 | 
				
			||||||
			       .UpdatePC(PCE),
 | 
								       .UpdatePC(PCE),
 | 
				
			||||||
			       .UpdateTarget(PCTargetE));
 | 
								       .UpdateTarget(PCTargetE),
 | 
				
			||||||
 | 
								       .UpdateInstrClass(InstrClassE));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // need to forward when updating to the same address as reading.
 | 
					  // need to forward when updating to the same address as reading.
 | 
				
			||||||
  //assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
 | 
					  //assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
 | 
				
			||||||
  //assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF;
 | 
					  //assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Part 4 RAS
 | 
					  // Part 3 RAS
 | 
				
			||||||
  // *** need to add the logic to restore RAS on flushes.  We will use incr for this.
 | 
					  // *** need to add the logic to restore RAS on flushes.  We will use incr for this.
 | 
				
			||||||
  RASPredictor RASPredictor(.clk(clk),
 | 
					  RASPredictor RASPredictor(.clk(clk),
 | 
				
			||||||
			    .reset(reset),
 | 
								    .reset(reset),
 | 
				
			||||||
			    .pop(InstrClassF[3]),
 | 
								    .pop(BPInstrClassF[3]),
 | 
				
			||||||
			    .popPC(RASPCF),
 | 
								    .popPC(RASPCF),
 | 
				
			||||||
			    .push(InstrClassE[3]),
 | 
								    .push(InstrClassE[3]),
 | 
				
			||||||
			    .incr(1'b0),
 | 
								    .incr(1'b0),
 | 
				
			||||||
			    .pushPC(PCLinkE));
 | 
								    .pushPC(PCLinkE));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF;
 | 
					  assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -150,15 +141,17 @@ module bpred
 | 
				
			|||||||
			       .reset(reset),
 | 
								       .reset(reset),
 | 
				
			||||||
			       .en(~StallF),
 | 
								       .en(~StallF),
 | 
				
			||||||
			       .clear(FlushF),
 | 
								       .clear(FlushF),
 | 
				
			||||||
			       .d(InstrClassF),
 | 
								       .d(BPInstrClassF),
 | 
				
			||||||
			       .q(InstrClassD));
 | 
								       .q(BPInstrClassD));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  flopenrc #(4) InstrClassRegE(.clk(clk),
 | 
					  flopenrc #(4) InstrClassRegE(.clk(clk),
 | 
				
			||||||
			       .reset(reset),
 | 
								       .reset(reset),
 | 
				
			||||||
			       .en(~StallD),
 | 
								       .en(~StallD),
 | 
				
			||||||
			       .clear(FlushD),
 | 
								       .clear(FlushD),
 | 
				
			||||||
			       .d(InstrClassD),
 | 
								       .d(BPInstrClassD),
 | 
				
			||||||
			       .q(InstrClassE));
 | 
								       .q(BPInstrClassE));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Check the prediction makes execution.
 | 
					  // Check the prediction makes execution.
 | 
				
			||||||
  assign TargetWrongE = PCTargetE != PCD;
 | 
					  assign TargetWrongE = PCTargetE != PCD;
 | 
				
			||||||
 | 
				
			|||||||
@ -67,6 +67,8 @@ module ifu (
 | 
				
			|||||||
  // branch predictor signals
 | 
					  // branch predictor signals
 | 
				
			||||||
  logic 	   SelBPPredF;
 | 
					  logic 	   SelBPPredF;
 | 
				
			||||||
  logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
 | 
					  logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
 | 
				
			||||||
 | 
					  logic [3:0] 	    InstrClassD, InstrClassE;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // *** put memory interface on here, InstrF becomes output
 | 
					  // *** put memory interface on here, InstrF becomes output
 | 
				
			||||||
@ -109,13 +111,12 @@ module ifu (
 | 
				
			|||||||
	      .PCNextF(PCNextF),
 | 
						      .PCNextF(PCNextF),
 | 
				
			||||||
	      .BPPredPCF(BPPredPCF),
 | 
						      .BPPredPCF(BPPredPCF),
 | 
				
			||||||
	      .SelBPPredF(SelBPPredF),
 | 
						      .SelBPPredF(SelBPPredF),
 | 
				
			||||||
	      .InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here.
 | 
					 | 
				
			||||||
	      // Also I believe this port will be removed.
 | 
					 | 
				
			||||||
	      .PCE(PCE),
 | 
						      .PCE(PCE),
 | 
				
			||||||
	      .PCSrcE(PCSrcE),
 | 
						      .PCSrcE(PCSrcE),
 | 
				
			||||||
	      .PCTargetE(PCTargetE),
 | 
						      .PCTargetE(PCTargetE),
 | 
				
			||||||
	      .PCD(PCD),
 | 
						      .PCD(PCD),
 | 
				
			||||||
	      .PCLinkE(PCLinkE),
 | 
						      .PCLinkE(PCLinkE),
 | 
				
			||||||
 | 
						      .InstrClassE(InstrClassE),
 | 
				
			||||||
	      .BPPredWrongE(BPPredWrongE));
 | 
						      .BPPredWrongE(BPPredWrongE));
 | 
				
			||||||
  // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
 | 
					  // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
 | 
				
			||||||
  assign PCCorrectE =  PCSrcE ? PCTargetE : PCLinkE;
 | 
					  assign PCCorrectE =  PCSrcE ? PCTargetE : PCLinkE;
 | 
				
			||||||
@ -142,6 +143,14 @@ module ifu (
 | 
				
			|||||||
  assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
 | 
					  assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
 | 
				
			||||||
  // *** combine these with others in better way, including M, F
 | 
					  // *** combine these with others in better way, including M, F
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // the branch predictor needs a compact decoding of the instruction class.
 | 
				
			||||||
 | 
					  // *** consider adding in the alternate return address x5 for returns.
 | 
				
			||||||
 | 
					  assign InstrClassD[3] = InstrD[6:0] == 7'h67 && InstrD[19:15] == 5'h01; // return
 | 
				
			||||||
 | 
					  assign InstrClassD[2] = InstrD[6:0] == 7'h67 && InstrD[19:15] != 5'h01; // jump register, but not return
 | 
				
			||||||
 | 
					  assign InstrClassD[1] = InstrD[6:0] == 7'h6F; // jump
 | 
				
			||||||
 | 
					  assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Misaligned PC logic
 | 
					  // Misaligned PC logic
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  generate
 | 
					  generate
 | 
				
			||||||
@ -164,6 +173,13 @@ module ifu (
 | 
				
			|||||||
  flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM);
 | 
					  flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM);
 | 
				
			||||||
  flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later
 | 
					  flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  flopenrc #(4) InstrClassRegE(.clk(clk),
 | 
				
			||||||
 | 
								       .reset(reset),
 | 
				
			||||||
 | 
								       .en(~StallD),
 | 
				
			||||||
 | 
								       .clear(FlushD),
 | 
				
			||||||
 | 
								       .d(InstrClassD),
 | 
				
			||||||
 | 
								       .q(InstrClassE));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.  
 | 
					  // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.  
 | 
				
			||||||
  // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
 | 
					  // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
 | 
				
			||||||
  // have dedicated adder in Mem stage based on PCM + 2 or 4
 | 
					  // have dedicated adder in Mem stage based on PCM + 2 or 4
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user