diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 40fb5bb15..330607af4 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,10 +33,10 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [2:0] WrongPredInstrClassD, // Prediction class is wrong + input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong input logic [3:0] InstrClassD, input logic [3:0] InstrClassE, // Instr class - input logic [2:0] PredInstrClassF, + input logic [3:0] PredInstrClassF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -95,6 +95,3 @@ module RASPredictor #(parameter int StackSize = 16 )( endmodule - - - diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 8a1eb801b..f50232527 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,11 +72,10 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [2:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [2:0] InstrClassF; logic [3:0] InstrClassD; logic [3:0] InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; @@ -85,7 +84,7 @@ module bpred ( logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; - logic [2:0] WrongPredInstrClassD; + logic [3:0] WrongPredInstrClassD; logic BTBTargetWrongE; logic RASTargetWrongE; @@ -108,17 +107,17 @@ module bpred ( end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor - gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, + gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -151,7 +150,7 @@ module bpred ( // the branch predictor needs a compact decoding of the instruction class. if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [4:0] CompressedOpcF; - logic [2:0] InstrClassF; + logic [3:0] InstrClassF; logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; logic JumpF, BranchF; @@ -172,9 +171,8 @@ module bpred ( assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - //assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - // (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - + assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | @@ -196,8 +194,6 @@ module bpred ( assign InstrClassD[1] = JumpD ; assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); @@ -209,8 +205,9 @@ module bpred ( {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class - flopenrc #(3) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -221,11 +218,12 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[2:0]; + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); + //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -250,7 +248,6 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - //assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 2100eb352..7893ace5f 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -36,7 +36,7 @@ module btb #(parameter int Depth = 10 ) ( input logic StallF, StallD, StallM, FlushD, FlushM, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, // PC at various stages output logic [`XLEN-1:0] PredPCF, // BTB's guess at PC - output logic [2:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class output logic PredValidF, // BTB's guess is valid // update input logic AnyWrongPredInstrClassE, // BTB's instruction class guess was wrong @@ -50,8 +50,8 @@ module btb #(parameter int Depth = 10 ) ( logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex; logic [`XLEN-1:0] ResetPC; logic MatchF, MatchD, MatchE, MatchNextX, MatchXF; - logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+2:0] TableBTBPredictionF; + logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN-1:0] PredPCD; logic UpdateEn; logic TablePredValidF, PredValidD; @@ -79,10 +79,10 @@ module btb #(parameter int Depth = 10 ) ( flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); assign ForwardBTBPrediction = MatchF ? {PredValidF, BTBPredInstrClassF, PredPCF} : - MatchD ? {PredValidD, InstrClassD[2:0], PredPCD} : - {1'b1, InstrClassE[2:0], IEUAdrE} ; + MatchD ? {PredValidD, InstrClassD, PredPCD} : + {1'b1, InstrClassE, IEUAdrE} ; - flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); + flopenr #(`XLEN+5) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); assign {PredValidF, BTBPredInstrClassF, PredPCF} = MatchXF ? ForwardBTBPredictionF : {TablePredValidF, TableBTBPredictionF}; @@ -98,9 +98,9 @@ module btb #(parameter int Depth = 10 ) ( assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; // An optimization may be using a PC relative address. - ram2p1r1wbe #(2**Depth, `XLEN+3) memory( + ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), - .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE[2:0], IEUAdrE}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE, IEUAdrE}), .we2(UpdateEn), .bwe2('1)); flopenrc #(`XLEN+1) BTBD(clk, reset, FlushD, ~StallD, {PredValidF, PredPCF}, {PredValidD, PredPCD}); diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index fde1a0825..35a73fb04 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -31,12 +31,12 @@ module gshare #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCE, + input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchInstrE, BranchInstrM, PCSrcE ); @@ -44,20 +44,20 @@ module gshare #(parameter k = 10) ( logic [1:0] DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE, NewDirPredictionM; - logic [k-1:0] GHRF, GHRD, GHRE, GHR; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; logic PCSrcM; - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexE = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), .ra1(IndexNextF), .rd1(DirPredictionF), .wa2(IndexE), - .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -75,6 +75,7 @@ module gshare #(parameter k = 10) ( flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); + flopenrc #(k) GHRMReg(clk, reset, FlushM, ~StallM, GHRE, GHRM); endmodule diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 645ac99e6..8ebda61c5 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// speculativeglobalhistory.sv +// gsharePredictor.sv // // Written: Shreya Sanghai // Email: ssanghai@hmc.edu @@ -36,8 +36,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [3:0] InstrClassD, InstrClassE, - input logic [2:0] PredInstrClassF, WrongPredInstrClassD, + input logic [3:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, + input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -47,9 +48,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -57,8 +58,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( assign IndexNextF = GHRNextF; assign IndexF = GHRF; - assign IndexD = GHRD[k-1:0]; - assign IndexE = GHRE[k-1:0]; + assign IndexD = GHRD; + assign IndexE = GHRE; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF | reset), .ce2(~StallM & ~FlushM), @@ -111,15 +112,18 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; diff --git a/src/ifu/bpred/speculativegshare.sv b/src/ifu/bpred/speculativegshare.sv index 9d55dc874..1dd7d4cdb 100644 --- a/src/ifu/bpred/speculativegshare.sv +++ b/src/ifu/bpred/speculativegshare.sv @@ -37,9 +37,9 @@ module speculativegshare #(parameter int k = 10 ) ( output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, - input logic [2:0] PredInstrClassF, - input logic [3:0] InstrClassD, InstrClassE, - input logic [2:0] WrongPredInstrClassD, + input logic [3:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, + input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -49,9 +49,9 @@ module speculativegshare #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -113,15 +113,18 @@ module speculativegshare #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; diff --git a/testbench/tests.vh b/testbench/tests.vh index 1e1065cf1..ec6f04f43 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -50,9 +50,9 @@ string tvpaths[] = '{ string embench[] = '{ `EMBENCH, + "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", - "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/matmult-int/matmult-int",