I think ahead pipelining is working for local history.

This commit is contained in:
Ross Thompson 2023-05-03 12:52:32 -05:00
parent 414c79b923
commit 8b0791b6b5
6 changed files with 166 additions and 19 deletions

View File

@ -279,12 +279,13 @@ if(sys.argv[1] == '-b'):
dct[PredType] = (currSize, currPercent)
print(dct)
fig, axes = plt.subplots()
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue'}
for cat in dct:
(x, y) = dct[cat]
x=[int(2**int(v)) for v in x]
print(x, y)
#print(x, y)
print(cat)
axes.plot(x,y, color=colors[cat])
axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
#plt.scatter(x, y, label=cat)

View File

@ -46,18 +46,33 @@ configs = [
)
]
# bpdSize = [6, 8, 10, 12, 14, 16]
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# name = CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
bpdSize = [6, 8, 10, 12, 14, 16]
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
LHRSize = [4, 8, 10]
bpdType = ['local_basic']
for CurrBPType in bpdType:
for CurrBPSize in bpdSize:
name = CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
for CurrLHRSize in LHRSize:
name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
import os
from multiprocessing import Pool, TimeoutError

View File

@ -128,8 +128,13 @@ module bpred (
.BranchE, .BranchM, .PCSrcE);
end else if (`BPRED_TYPE == "BP_LOCAL_BASIC") begin:Predictor
localbpbasic #(`BPRED_NUM_LHR, `BPRED_SIZE)
DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
localbpbasic #(`BPRED_NUM_LHR, `BPRED_SIZE) DirPredictor(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (`BPRED_TYPE == "BP_LOCAL_AHEAD") begin:Predictor
localaheadbp #(`BPRED_NUM_LHR, `BPRED_SIZE) DirPredictor(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end

View File

@ -0,0 +1,111 @@
///////////////////////////////////////////
// gsharebasic.sv
//
// Written: Ross Thompson
// Email: ross1728@gmail.com
// Created: 16 March 2021
// Adapted from ssanghai@hmc.edu (Shreya Sanghai) global history predictor implementation.
// Modified: 20 February 2023
//
// Purpose: Global History Branch predictor with parameterized global history register
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module localaheadbp #(parameter m = 6, // 2^m = number of local history branches
parameter k = 10) ( // number of past branches stored
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] BPDirPredF,
output logic BPDirPredWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCM,
input logic BranchE, BranchM, PCSrcE
);
logic [k-1:0] IndexNextF, IndexM;
logic [1:0] BPDirPredD, BPDirPredE;
logic [1:0] NewBPDirPredE, NewBPDirPredM;
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHRNextF;
logic [k-1:0] LHRNextW;
logic PCSrcM;
logic [2**m-1:0][k-1:0] LHRArray;
logic [m-1:0] IndexLHRNextF, IndexLHRM;
logic [`XLEN-1:0] PCW;
logic UpdateM;
//assign IndexNextF = LHR;
assign IndexM = LHRM;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF), .ce2(~StallW & ~FlushW),
.ra1(LHRNextF),
.rd1(BPDirPredF),
.wa2(IndexM),
.wd2(NewBPDirPredM),
.we2(BranchM),
.bwe2(1'b1));
flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE));
flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
// This is the main difference between global and local history basic implementations. In global,
// the ghr wraps back into itself directly without
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
// this is local history
//genvar index;
//assign UpdateM = BranchM & ~StallW & ~FlushW;
assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
.ce1(~StallF), .ce2(~StallW & ~FlushW),
.ra1(IndexLHRNextF),
.rd1(LHRNextF),
.wa2(IndexLHRM),
.wd2(LHRNextW),
.we2(BranchM),
.bwe2('1));
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule

View File

@ -74,10 +74,10 @@ module localbpbasic #(parameter m = 6, // 2^m = number of local history branches
assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
// this is the main difference between global and local history basic implementations rather than
// having multiple history registers. In global, the ghr wraps back into itself directly without
// being pipelined. IE. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is just read in M and updated. GHR is still pipelined so that the PHT is updated with the correct
// This is the main difference between global and local history basic implementations. In global,
// the ghr wraps back into itself directly without
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;

View File

@ -28,7 +28,7 @@
`include "wally-config.vh"
`include "tests.vh"
`define PrintHPMCounters 0
`define PrintHPMCounters 1
`define BPRED_LOGGER 0
`define I_CACHE_ADDR_LOGGER 0
`define D_CACHE_ADDR_LOGGER 0
@ -536,6 +536,21 @@ module testbench;
if (`BPRED_SUPPORTED) begin
integer adrindex;
// local history only
if (`BPRED_TYPE == "BP_LOCAL_AHEAD") begin
always @(*) begin
if(reset) begin
for(adrindex = 0; adrindex < 2**`BPRED_NUM_LHR; adrindex++) begin
force dut.core.ifu.bpred.bpred.Predictor.DirPredictor.BHT.mem[adrindex] = 0;
end
#1;
for(adrindex = 0; adrindex < 2**`BPRED_NUM_LHR; adrindex++) begin
release dut.core.ifu.bpred.bpred.Predictor.DirPredictor.BHT.mem[adrindex];
end
end
end
end
always @(*) begin
if(reset) begin
for(adrindex = 0; adrindex < 2**`BTB_SIZE; adrindex++) begin