From 89aa57e25e67a7671a25863f3eda037501098c61 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Feb 2023 00:17:37 -0600 Subject: [PATCH] Possibly much better branch predictor implemention. The complexity is significantly reduced. --- config/rv32gc/wally-config.vh | 4 +- config/rv64gc/wally-config.vh | 2 +- src/ifu/bpred/bpred.sv | 6 ++ src/ifu/bpred/gshareForward.sv | 108 +++++++++++++++++++++++++++++++++ testbench/testbench.sv | 4 +- testbench/tests.vh | 2 +- 6 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 src/ifu/bpred/gshareForward.sv diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index 089a9ada4..c40aadbc1 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -132,8 +132,8 @@ `define PLIC_UART_ID 10 `define BPRED_SUPPORTED 1 -`define BPRED_TYPE "BPSPECULATIVEGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE -`define BPRED_SIZE 10 +`define BPRED_TYPE "GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE +`define BPRED_SIZE 16 `define HPTW_WRITES_SUPPORTED 0 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index c3dd87295..e02f683c7 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -135,7 +135,7 @@ `define PLIC_UART_ID 10 `define BPRED_SUPPORTED 1 -`define BPRED_TYPE "BPSPECULATIVEGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE or BPSPECULATIVEGLOBAL or BPSPECULATIVEGSHARE or BPOLDGSHARE or BPOLDGSHARE2 +`define BPRED_TYPE "GSHARE_FORWARD" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE or BPSPECULATIVEGLOBAL or BPSPECULATIVEGSHARE or BPOLDGSHARE or BPOLDGSHARE2 `define BPRED_SIZE 10 `define HPTW_WRITES_SUPPORTED 0 diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 115db895c..c7a8d196e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -119,6 +119,12 @@ module bpred ( .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); + end else if (`BPRED_TYPE == "GSHARE_FORWARD") begin:Predictor + gshareForward #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, + .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .PCSrcE); + end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me /* -----\/----- EXCLUDED -----\/----- diff --git a/src/ifu/bpred/gshareForward.sv b/src/ifu/bpred/gshareForward.sv new file mode 100644 index 000000000..77ab90fba --- /dev/null +++ b/src/ifu/bpred/gshareForward.sv @@ -0,0 +1,108 @@ +/////////////////////////////////////////// +// globalHistoryPredictor.sv +// +// Written: Shreya Sanghai +// Email: ssanghai@hmc.edu +// Created: March 16, 2021 +// Modified: +// +// Purpose: Global History Branch predictor with parameterized global history register +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module gshareForward #(parameter k = 10) ( + input logic clk, + input logic reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + output logic [1:0] DirPredictionF, + output logic DirPredictionWrongE, + // update + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, + input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, PCSrcE +); + + logic MatchF, MatchD, MatchE, MatchM; + logic MatchNextX, MatchXF; + + logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPrediction, ForwardDirPredictionF; + logic [1:0] NewDirPredictionE, NewDirPredictionM; + + + logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM; + + logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; + logic [k-1:0] GHRNext, GHRNextF; + logic PCSrcM; + + assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + + assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]}; + assign IndexD = GHRD ^ {PCD[k+1] ^ PCD[1], PCD[k:2]}; + assign IndexE = GHRE ^ {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexM = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; + + assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); + assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); + assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); + assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); + assign MatchNextX = MatchF | MatchD | MatchE | MatchM; + + flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + + assign ForwardNewDirPrediction = MatchF ? {2{DirPredictionF[1]}} : + MatchD ? {2{DirPredictionD[1]}} : + MatchE ? {NewDirPredictionE} : + NewDirPredictionM ; + + flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); + + assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; + + ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), + .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ra1(IndexNextF), + .rd1(TableDirPredictionF), + .wa2(IndexM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), + .bwe2(1'b1)); + + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + + assign GHRNextF = BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchInstrD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; + assign GHRD = BranchInstrE ? {PCSrcE, GHRE[k-1:1]} : GHRE; + assign GHRE = BranchInstrM ? {PCSrcM, GHRM[k-1:1]} : GHRM; + + assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; + assign GHRM = GHR; + + flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); + flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); + +endmodule diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 329d4c605..a85ffd65b 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -28,8 +28,8 @@ `include "wally-config.vh" `include "tests.vh" -`define PrintHPMCounters 0 -`define BPRED_LOGGER 0 +`define PrintHPMCounters 1 +`define BPRED_LOGGER 1 module testbench; parameter DEBUG=0; diff --git a/testbench/tests.vh b/testbench/tests.vh index ec6f04f43..6ab4533fa 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -51,6 +51,7 @@ string tvpaths[] = '{ string embench[] = '{ `EMBENCH, "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches + "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", "bd_speedopt_speed/src/edn/edn", @@ -61,7 +62,6 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256", "bd_speedopt_speed/src/nbody/nbody", - "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", "bd_speedopt_speed/src/qrduino/qrduino",