mirror of
				https://github.com/openhwgroup/cvw
				synced 2025-02-11 06:05:49 +00:00 
			
		
		
		
	Modified the BTB to save power by not updating when the prediction is unchanged.
This commit is contained in:
		
							parent
							
								
									bc5aecf948
								
							
						
					
					
						commit
						544abe2819
					
				| @ -96,6 +96,7 @@ module bpred ( | |||||||
|   logic 		   BranchM, JumpM, ReturnM, CallM; |   logic 		   BranchM, JumpM, ReturnM, CallM; | ||||||
|   logic 		   BranchW, JumpW, ReturnW, CallW; |   logic 		   BranchW, JumpW, ReturnW, CallW; | ||||||
|   logic 		   WrongBPReturnD; |   logic 		   WrongBPReturnD; | ||||||
|  |   logic [`XLEN-1:0] BTAE; | ||||||
|    |    | ||||||
|   // Part 1 branch direction prediction
 |   // Part 1 branch direction prediction
 | ||||||
|   // look into the 2 port Sram model. something is wrong. 
 |   // look into the 2 port Sram model. something is wrong. 
 | ||||||
| @ -148,9 +149,9 @@ module bpred ( | |||||||
|   btb #(`BTB_SIZE)  |   btb #(`BTB_SIZE)  | ||||||
|     TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, |     TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, | ||||||
|           .PCNextF, .PCF, .PCD, .PCE, .PCM, |           .PCNextF, .PCF, .PCD, .PCE, .PCM, | ||||||
|           .BTAF, .BTAD, |           .BTAF, .BTAD, .BTAE, | ||||||
|           .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), |           .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), | ||||||
|           .PredictionInstrClassWrongM, |           .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, | ||||||
|           .IEUAdrE, .IEUAdrM, |           .IEUAdrE, .IEUAdrM, | ||||||
|           .InstrClassD({CallD, ReturnD, JumpD, BranchD}),  |           .InstrClassD({CallD, ReturnD, JumpD, BranchD}),  | ||||||
|           .InstrClassE({CallE, ReturnE, JumpE, BranchE}),  |           .InstrClassE({CallE, ReturnE, JumpE, BranchE}),  | ||||||
| @ -195,7 +196,7 @@ module bpred ( | |||||||
| 
 | 
 | ||||||
|   if(`ZICOUNTERS_SUPPORTED) begin |   if(`ZICOUNTERS_SUPPORTED) begin | ||||||
|     logic 					JumpOrTakenBranchE; |     logic 					JumpOrTakenBranchE; | ||||||
|     logic [`XLEN-1:0] 				BTAE, RASPCD, RASPCE; |     logic [`XLEN-1:0] 	    RASPCD, RASPCE; | ||||||
|     logic 					BTBPredPCWrongE, RASPredPCWrongE;	 |     logic 					BTBPredPCWrongE, RASPredPCWrongE;	 | ||||||
|     // performance counters
 |     // performance counters
 | ||||||
|     // 1. class         (class wrong / minstret) (PredictionInstrClassWrongM / csr)                    // Correct now
 |     // 1. class         (class wrong / minstret) (PredictionInstrClassWrongM / csr)                    // Correct now
 | ||||||
| @ -214,8 +215,6 @@ module bpred ( | |||||||
|      |      | ||||||
|     flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); |     flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); | ||||||
| 
 | 
 | ||||||
|     flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); |  | ||||||
| 
 |  | ||||||
|     flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); |     flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); | ||||||
|     flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); |     flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); | ||||||
|     flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,  |     flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,  | ||||||
|  | |||||||
| @ -37,9 +37,11 @@ module btb #(parameter Depth = 10 ) ( | |||||||
|   input  logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
 |   input  logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
 | ||||||
|   output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
 |   output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
 | ||||||
|   output logic [`XLEN-1:0] BTAD, |   output logic [`XLEN-1:0] BTAD, | ||||||
|  |   output logic [`XLEN-1:0] BTAE, | ||||||
|   output logic [3:0] 	   BTBIClassF, // BTB's guess at instruction class
 |   output logic [3:0] 	   BTBIClassF, // BTB's guess at instruction class
 | ||||||
|   // update
 |   // update
 | ||||||
|   input  logic 			   PredictionInstrClassWrongM, // BTB's instruction class guess was wrong
 |   input  logic 			   PredictionInstrClassWrongM, // BTB's instruction class guess was wrong
 | ||||||
|  |   input  logic             AnyWrongPredInstrClassE, | ||||||
|   input  logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
 |   input  logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
 | ||||||
|   input  logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
 |   input  logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
 | ||||||
|   input  logic [3:0] 	   InstrClassD, // Instruction class to insert into btb
 |   input  logic [3:0] 	   InstrClassD, // Instruction class to insert into btb
 | ||||||
| @ -53,9 +55,11 @@ module btb #(parameter Depth = 10 ) ( | |||||||
|   logic 		   MatchD, MatchE, MatchM, MatchW, MatchX; |   logic 		   MatchD, MatchE, MatchM, MatchW, MatchX; | ||||||
|   logic [`XLEN+3:0] 	   ForwardBTBPrediction, ForwardBTBPredictionF; |   logic [`XLEN+3:0] 	   ForwardBTBPrediction, ForwardBTBPredictionF; | ||||||
|   logic [`XLEN+3:0] 	   TableBTBPredF; |   logic [`XLEN+3:0] 	   TableBTBPredF; | ||||||
|   logic 		   UpdateEn; |  | ||||||
|   logic [`XLEN-1:0] 	   IEUAdrW; |   logic [`XLEN-1:0] 	   IEUAdrW; | ||||||
|   logic [`XLEN-1:0]        PCW; |   logic [`XLEN-1:0]        PCW; | ||||||
|  |   logic 				   BTAWrongE, BTBWrongE; | ||||||
|  |   logic 				   BTBWrongM; | ||||||
|  |    | ||||||
|    |    | ||||||
|   // hashing function for indexing the PC
 |   // hashing function for indexing the PC
 | ||||||
|   // We have Depth bits to index, but XLEN bits as the input.
 |   // We have Depth bits to index, but XLEN bits as the input.
 | ||||||
| @ -87,14 +91,22 @@ module btb #(parameter Depth = 10 ) ( | |||||||
| 
 | 
 | ||||||
|   assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; |   assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; | ||||||
| 
 | 
 | ||||||
|   assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; |  | ||||||
| 
 | 
 | ||||||
|   // An optimization may be using a PC relative address.
 |   // An optimization may be using a PC relative address.
 | ||||||
|   ram2p1r1wbe #(2**Depth, `XLEN+4) memory( |   ram2p1r1wbe #(2**Depth, `XLEN+4) memory( | ||||||
|     .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), |     .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), | ||||||
|      .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); |      .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); | ||||||
| 
 | 
 | ||||||
|   flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); |   flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); | ||||||
|  | 
 | ||||||
|  |   // BTAE is not strickly necessary.  However it is used by two parts of wally.
 | ||||||
|  |   // 1. It gates updates to the BTB when the prediction does not change.  This save power.
 | ||||||
|  |   // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong.
 | ||||||
|  |   flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); | ||||||
|  |   assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); | ||||||
|  |   assign BTBWrongE = BTAWrongE | AnyWrongPredInstrClassE; | ||||||
|  |   flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); | ||||||
|  | 
 | ||||||
|   flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); |   flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); | ||||||
|   flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); |   flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user