mirror of
				https://github.com/openhwgroup/cvw
				synced 2025-02-11 06:05:49 +00:00 
			
		
		
		
	I think ahead pipelining is working for local history.
This commit is contained in:
		
							parent
							
								
									c4d6724867
								
							
						
					
					
						commit
						35a59a1193
					
				@ -279,12 +279,13 @@ if(sys.argv[1] == '-b'):
 | 
			
		||||
                    dct[PredType] = (currSize, currPercent)
 | 
			
		||||
        print(dct)
 | 
			
		||||
        fig, axes = plt.subplots()
 | 
			
		||||
        marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
 | 
			
		||||
        colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
 | 
			
		||||
        marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x'}
 | 
			
		||||
        colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue'}
 | 
			
		||||
        for cat in dct:
 | 
			
		||||
            (x, y) = dct[cat]
 | 
			
		||||
            x=[int(2**int(v)) for v in x]
 | 
			
		||||
            print(x, y)
 | 
			
		||||
            #print(x, y)
 | 
			
		||||
            print(cat)
 | 
			
		||||
            axes.plot(x,y, color=colors[cat])
 | 
			
		||||
            axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
 | 
			
		||||
            #plt.scatter(x, y, label=cat)
 | 
			
		||||
 | 
			
		||||
@ -46,18 +46,33 @@ configs = [
 | 
			
		||||
    )
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
# bpdSize = [6, 8, 10, 12, 14, 16]
 | 
			
		||||
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
 | 
			
		||||
# for CurrBPType in bpdType:
 | 
			
		||||
#     for CurrBPSize in bpdSize:
 | 
			
		||||
#         name = CurrBPType+str(CurrBPSize)
 | 
			
		||||
#         configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
 | 
			
		||||
#         tc = TestCase(
 | 
			
		||||
#             name=name,
 | 
			
		||||
#             variant="rv32gc",
 | 
			
		||||
#             cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
 | 
			
		||||
#             grepstr="")
 | 
			
		||||
#         configs.append(tc)
 | 
			
		||||
 | 
			
		||||
bpdSize = [6, 8, 10, 12, 14, 16]
 | 
			
		||||
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
 | 
			
		||||
LHRSize = [4, 8, 10]
 | 
			
		||||
bpdType = ['local_basic']
 | 
			
		||||
for CurrBPType in bpdType:
 | 
			
		||||
    for CurrBPSize in bpdSize:
 | 
			
		||||
        name = CurrBPType+str(CurrBPSize)
 | 
			
		||||
        configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
 | 
			
		||||
        tc = TestCase(
 | 
			
		||||
            name=name,
 | 
			
		||||
            variant="rv32gc",
 | 
			
		||||
            cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
 | 
			
		||||
            grepstr="")
 | 
			
		||||
        configs.append(tc)
 | 
			
		||||
        for CurrLHRSize in  LHRSize:
 | 
			
		||||
            name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
 | 
			
		||||
            configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
 | 
			
		||||
            tc = TestCase(
 | 
			
		||||
                name=name,
 | 
			
		||||
                variant="rv32gc",
 | 
			
		||||
                cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
 | 
			
		||||
                grepstr="")
 | 
			
		||||
            configs.append(tc)
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
from multiprocessing import Pool, TimeoutError
 | 
			
		||||
 | 
			
		||||
@ -128,8 +128,13 @@ module bpred (
 | 
			
		||||
      .BranchE, .BranchM, .PCSrcE);
 | 
			
		||||
  
 | 
			
		||||
  end else if (`BPRED_TYPE == "BP_LOCAL_BASIC") begin:Predictor
 | 
			
		||||
    localbpbasic #(`BPRED_NUM_LHR, `BPRED_SIZE)
 | 
			
		||||
DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
 | 
			
		||||
    localbpbasic #(`BPRED_NUM_LHR, `BPRED_SIZE) DirPredictor(.clk, .reset, 
 | 
			
		||||
      .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
 | 
			
		||||
      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
 | 
			
		||||
      .BranchE, .BranchM, .PCSrcE);
 | 
			
		||||
  end else if (`BPRED_TYPE == "BP_LOCAL_AHEAD") begin:Predictor
 | 
			
		||||
    localaheadbp #(`BPRED_NUM_LHR, `BPRED_SIZE) DirPredictor(.clk, .reset, 
 | 
			
		||||
      .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
 | 
			
		||||
      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
 | 
			
		||||
      .BranchE, .BranchM, .PCSrcE);
 | 
			
		||||
  end 
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										111
									
								
								src/ifu/bpred/localaheadbp.sv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								src/ifu/bpred/localaheadbp.sv
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,111 @@
 | 
			
		||||
///////////////////////////////////////////
 | 
			
		||||
// gsharebasic.sv
 | 
			
		||||
//
 | 
			
		||||
// Written: Ross Thompson
 | 
			
		||||
// Email: ross1728@gmail.com
 | 
			
		||||
// Created: 16 March 2021
 | 
			
		||||
// Adapted from ssanghai@hmc.edu (Shreya Sanghai) global history predictor implementation.
 | 
			
		||||
// Modified: 20 February 2023 
 | 
			
		||||
//
 | 
			
		||||
// Purpose: Global History Branch predictor with parameterized global history register
 | 
			
		||||
// 
 | 
			
		||||
// A component of the CORE-V-WALLY configurable RISC-V project.
 | 
			
		||||
// 
 | 
			
		||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
 | 
			
		||||
//
 | 
			
		||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
 | 
			
		||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
 | 
			
		||||
// may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
// https://solderpad.org/licenses/SHL-2.1/
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, any work distributed under the 
 | 
			
		||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
 | 
			
		||||
// either express or implied. See the License for the specific language governing permissions 
 | 
			
		||||
// and limitations under the License.
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
`include "wally-config.vh"
 | 
			
		||||
 | 
			
		||||
module localaheadbp #(parameter m = 6, // 2^m = number of local history branches 
 | 
			
		||||
                      parameter k = 10) ( // number of past branches stored
 | 
			
		||||
  input logic             clk,
 | 
			
		||||
  input logic             reset,
 | 
			
		||||
  input logic             StallF, StallD, StallE, StallM, StallW,
 | 
			
		||||
  input logic             FlushD, FlushE, FlushM, FlushW,
 | 
			
		||||
  output logic [1:0]      BPDirPredF, 
 | 
			
		||||
  output logic            BPDirPredWrongE,
 | 
			
		||||
  // update
 | 
			
		||||
  input logic [`XLEN-1:0] PCNextF, PCM,
 | 
			
		||||
  input logic             BranchE, BranchM, PCSrcE
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
  logic [k-1:0]           IndexNextF, IndexM;
 | 
			
		||||
  logic [1:0]             BPDirPredD, BPDirPredE;
 | 
			
		||||
  logic [1:0]             NewBPDirPredE, NewBPDirPredM;
 | 
			
		||||
 | 
			
		||||
  logic [k-1:0]           LHRF, LHRD, LHRE, LHRM, LHRNextF;
 | 
			
		||||
  logic [k-1:0]           LHRNextW;
 | 
			
		||||
  logic                   PCSrcM;
 | 
			
		||||
  logic [2**m-1:0][k-1:0] LHRArray;
 | 
			
		||||
  logic [m-1:0]           IndexLHRNextF, IndexLHRM;
 | 
			
		||||
  logic [`XLEN-1:0]       PCW;
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  logic                    UpdateM;
 | 
			
		||||
 | 
			
		||||
  //assign IndexNextF = LHR;
 | 
			
		||||
  assign IndexM = LHRM;
 | 
			
		||||
  
 | 
			
		||||
  ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
 | 
			
		||||
    .ce1(~StallF), .ce2(~StallW & ~FlushW),
 | 
			
		||||
    .ra1(LHRNextF),
 | 
			
		||||
    .rd1(BPDirPredF),
 | 
			
		||||
    .wa2(IndexM),
 | 
			
		||||
    .wd2(NewBPDirPredM),
 | 
			
		||||
    .we2(BranchM),
 | 
			
		||||
    .bwe2(1'b1));
 | 
			
		||||
 | 
			
		||||
  flopenrc #(2) PredictionRegD(clk, reset,  FlushD, ~StallD, BPDirPredF, BPDirPredD);
 | 
			
		||||
  flopenrc #(2) PredictionRegE(clk, reset,  FlushE, ~StallE, BPDirPredD, BPDirPredE);
 | 
			
		||||
 | 
			
		||||
  satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE));
 | 
			
		||||
  flopenrc #(2) NewPredictionRegM(clk, reset,  FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
 | 
			
		||||
 | 
			
		||||
  assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
 | 
			
		||||
 | 
			
		||||
  // This is the main difference between global and local history basic implementations. In global, 
 | 
			
		||||
  // the ghr wraps back into itself directly without
 | 
			
		||||
  // being pipelined.  I.E. GHR is not read in F and then pipelined to M where it is updated.  Instead
 | 
			
		||||
  // GHR is both read and update in M.  GHR is still pipelined so that the PHT is updated with the correct
 | 
			
		||||
  // GHR.  Local history in contrast must pipeline the specific history register read during F and then update
 | 
			
		||||
  // that same one in M.  This implementation does not forward if a branch matches in the D, E, or M stages.
 | 
			
		||||
  assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
 | 
			
		||||
 | 
			
		||||
  // this is local history
 | 
			
		||||
  //genvar      index;
 | 
			
		||||
  //assign UpdateM = BranchM & ~StallW & ~FlushW;
 | 
			
		||||
  assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
 | 
			
		||||
  assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
 | 
			
		||||
 | 
			
		||||
  ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
 | 
			
		||||
    .ce1(~StallF), .ce2(~StallW & ~FlushW),
 | 
			
		||||
    .ra1(IndexLHRNextF),
 | 
			
		||||
    .rd1(LHRNextF),
 | 
			
		||||
    .wa2(IndexLHRM),
 | 
			
		||||
    .wd2(LHRNextW),
 | 
			
		||||
    .we2(BranchM),
 | 
			
		||||
    .bwe2('1));  
 | 
			
		||||
 | 
			
		||||
  flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
 | 
			
		||||
    
 | 
			
		||||
  flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
 | 
			
		||||
  flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
 | 
			
		||||
  flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
 | 
			
		||||
  flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
 | 
			
		||||
 | 
			
		||||
  flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
 | 
			
		||||
 | 
			
		||||
endmodule
 | 
			
		||||
@ -74,10 +74,10 @@ module localbpbasic #(parameter m = 6, // 2^m = number of local history branches
 | 
			
		||||
 | 
			
		||||
  assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
 | 
			
		||||
 | 
			
		||||
  // this is the main difference between global and local history basic implementations rather than
 | 
			
		||||
  // having multiple history registers.  In global, the ghr wraps back into itself directly without
 | 
			
		||||
  // being pipelined.  IE. GHR is not read in F and then pipelined to M where it is updated.  Instead
 | 
			
		||||
  // GHR is just read in M and updated.  GHR is still pipelined so that the PHT is updated with the correct
 | 
			
		||||
  // This is the main difference between global and local history basic implementations. In global, 
 | 
			
		||||
  // the ghr wraps back into itself directly without
 | 
			
		||||
  // being pipelined.  I.E. GHR is not read in F and then pipelined to M where it is updated.  Instead
 | 
			
		||||
  // GHR is both read and update in M.  GHR is still pipelined so that the PHT is updated with the correct
 | 
			
		||||
  // GHR.  Local history in contrast must pipeline the specific history register read during F and then update
 | 
			
		||||
  // that same one in M.  This implementation does not forward if a branch matches in the D, E, or M stages.
 | 
			
		||||
  assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
 | 
			
		||||
 | 
			
		||||
@ -28,7 +28,7 @@
 | 
			
		||||
`include "wally-config.vh"
 | 
			
		||||
`include "tests.vh"
 | 
			
		||||
 | 
			
		||||
`define PrintHPMCounters 0
 | 
			
		||||
`define PrintHPMCounters 1
 | 
			
		||||
`define BPRED_LOGGER 0
 | 
			
		||||
`define I_CACHE_ADDR_LOGGER 0
 | 
			
		||||
`define D_CACHE_ADDR_LOGGER 0
 | 
			
		||||
@ -536,6 +536,21 @@ module testbench;
 | 
			
		||||
  if (`BPRED_SUPPORTED) begin
 | 
			
		||||
    integer adrindex;
 | 
			
		||||
 | 
			
		||||
    // local history only
 | 
			
		||||
    if (`BPRED_TYPE == "BP_LOCAL_AHEAD") begin
 | 
			
		||||
      always @(*) begin
 | 
			
		||||
        if(reset) begin
 | 
			
		||||
          for(adrindex = 0; adrindex < 2**`BPRED_NUM_LHR; adrindex++) begin
 | 
			
		||||
            force dut.core.ifu.bpred.bpred.Predictor.DirPredictor.BHT.mem[adrindex] = 0;
 | 
			
		||||
          end
 | 
			
		||||
            #1;
 | 
			
		||||
          for(adrindex = 0; adrindex < 2**`BPRED_NUM_LHR; adrindex++) begin
 | 
			
		||||
            release dut.core.ifu.bpred.bpred.Predictor.DirPredictor.BHT.mem[adrindex];
 | 
			
		||||
          end
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    always @(*) begin
 | 
			
		||||
      if(reset) begin
 | 
			
		||||
        for(adrindex = 0; adrindex < 2**`BTB_SIZE; adrindex++) begin
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user