diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 108b0bb1e..d66425343 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -63,6 +63,8 @@ module controller( output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid + output logic BranchD, BranchE, + output logic JumpD, output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals @@ -85,8 +87,6 @@ module controller( logic RegWriteD, RegWriteE; // RegWrite (register will be written) logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file logic [1:0] MemRWD, MemRWE; // Store (write to memory) - logic JumpD; // Jump instruction - logic BranchD, BranchE; // Branch instruction logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) logic [2:0] ALUControlD; // Determines ALU operation logic ALUSrcAD, ALUSrcBD; // ALU inputs diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 9df95040d..9d3a833e9 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -55,6 +55,8 @@ module ieu ( input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid + output logic BranchD, BranchE, + output logic JumpD, JumpE, // Writeback stage signals input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) input logic [`XLEN-1:0] CSRReadValW, // CSR read value, @@ -87,7 +89,6 @@ module ieu ( logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages logic MemReadE, CSRReadE; // Load, CSRRead instruction - logic JumpE; // Jump instruction logic BranchSignedE; // Branch does signed comparison on operands logic MDUE; // Multiply/divide instruction @@ -95,7 +96,7 @@ module ieu ( .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE, - .Funct3E, .IntDivE, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, + .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 0a841ae11..330607af4 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -34,7 +34,9 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong - input logic [3:0] InstrClassD, InstrClassE, PredInstrClassF, // Instr class + input logic [3:0] InstrClassD, + input logic [3:0] InstrClassE, // Instr class + input logic [3:0] PredInstrClassF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -93,6 +95,3 @@ module RASPredictor #(parameter int StackSize = 16 )( endmodule - - - diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index c2ad9ac91..f50232527 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -52,6 +52,8 @@ module bpred ( // Branch and jump outcome input logic InstrValidD, InstrValidE, + input logic BranchD, BranchE, + input logic JumpD, JumpE, input logic PCSrcE, // Executation stage branch is taken input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) @@ -70,11 +72,12 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; + logic [3:0] InstrClassD; + logic [3:0] InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; logic SelBPPredF; @@ -83,9 +86,9 @@ module bpred ( logic [`XLEN-1:0] PCCorrectE; logic [3:0] WrongPredInstrClassD; - logic BTBTargetWrongE; - logic RASTargetWrongE; - logic JumpOrTakenBranchE; + logic BTBTargetWrongE; + logic RASTargetWrongE; + logic JumpOrTakenBranchE; logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE; @@ -104,17 +107,17 @@ module bpred ( end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor - gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, + gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -148,7 +151,8 @@ module bpred ( if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [4:0] CompressedOpcF; logic [3:0] InstrClassF; - logic cjal, cj, cjr, cjalr; + logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; + logic JumpF, BranchF; assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; @@ -156,30 +160,27 @@ module bpred ( assign cj = CompressedOpcF == 5'h0d; assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = cjal | cj | cjr | cjalr; + assign CBranchF = CompressedOpcF[4:1] == 4'h7; + + assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign BranchF = PostSpillInstrRawF[6:0] == 7'h63; - assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | - (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); - - assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 - (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); - - assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 + assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF); + assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (cjal | cj | cj | cjalr)); + assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[2] | - PredInstrClassF[1] | - PredInstrClassF[3]; + PredInstrClassF[1]; end else begin assign PredInstrClassF = BTBPredInstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | - PredInstrClassF[2] | - (PredInstrClassF[1] & PredValidF) | - (PredInstrClassF[3] & PredValidF); + PredInstrClassF[1] & PredValidF; end // Part 3 RAS @@ -189,15 +190,13 @@ module bpred ( assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; - assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 - assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch + assign InstrClassD[0] = BranchD; + assign InstrClassD[1] = JumpD ; + assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 + assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); - flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor @@ -208,6 +207,7 @@ module bpred ( // pipeline the class flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -218,11 +218,12 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); + //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -247,10 +248,10 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; + assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; + assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1]; flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index c538636df..7893ace5f 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -53,7 +53,6 @@ module btb #(parameter int Depth = 10 ) ( logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN-1:0] PredPCD; - logic [3:0] PredInstrClassD; // *** copy of reg outside module logic UpdateEn; logic TablePredValidF, PredValidD; @@ -96,8 +95,6 @@ module btb #(parameter int Depth = 10 ) ( if(~StallF | reset) TablePredValidF = ValidBits[PCNextFIndex]; end - //assign PredValidF = MatchXF ? 1'b1 : TablePredValidF; - assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; // An optimization may be using a PC relative address. diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index fde1a0825..35a73fb04 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -31,12 +31,12 @@ module gshare #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCE, + input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchInstrE, BranchInstrM, PCSrcE ); @@ -44,20 +44,20 @@ module gshare #(parameter k = 10) ( logic [1:0] DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE, NewDirPredictionM; - logic [k-1:0] GHRF, GHRD, GHRE, GHR; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; logic PCSrcM; - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexE = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), .ra1(IndexNextF), .rd1(DirPredictionF), .wa2(IndexE), - .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -75,6 +75,7 @@ module gshare #(parameter k = 10) ( flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); + flopenrc #(k) GHRMReg(clk, reset, FlushM, ~StallM, GHRE, GHRM); endmodule diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 51dbb422b..8ebda61c5 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// speculativeglobalhistory.sv +// gsharePredictor.sv // // Written: Shreya Sanghai // Email: ssanghai@hmc.edu @@ -36,7 +36,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, + input logic [3:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -47,9 +48,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -57,8 +58,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( assign IndexNextF = GHRNextF; assign IndexF = GHRF; - assign IndexD = GHRD[k-1:0]; - assign IndexE = GHRE[k-1:0]; + assign IndexD = GHRD; + assign IndexE = GHRE; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF | reset), .ce2(~StallM & ~FlushM), @@ -111,15 +112,18 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; diff --git a/src/ifu/bpred/speculativegshare.sv b/src/ifu/bpred/speculativegshare.sv index 1eb888a90..1dd7d4cdb 100644 --- a/src/ifu/bpred/speculativegshare.sv +++ b/src/ifu/bpred/speculativegshare.sv @@ -37,7 +37,8 @@ module speculativegshare #(parameter int k = 10 ) ( output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, - input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, + input logic [3:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -48,9 +49,9 @@ module speculativegshare #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -112,15 +113,18 @@ module speculativegshare #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 68350bac5..51317e0b0 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -36,6 +36,8 @@ module ifu ( input logic InvalidateICacheM, // Clears all instruction cache valid bits input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE) input logic InstrValidD, InstrValidE, InstrValidM, + input logic BranchD, BranchE, + input logic JumpD, JumpE, // Bus interface output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU @@ -323,7 +325,8 @@ module ifu ( if (`BPRED_SUPPORTED) begin : bpred bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, - .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, + .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, + .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 3a57b9aed..a03caea15 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -162,11 +162,13 @@ module wallypipelinedcore ( logic FCvtIntE; logic CommittedF; logic JumpOrTakenBranchM; + logic BranchD, BranchE, JumpD, JumpE; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, + .BranchD, .BranchE, .JumpD, .JumpE, // Fetch .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, @@ -200,6 +202,7 @@ module wallypipelinedcore ( .Funct3M, // size and signedness to LSU .SrcAM, // to privilege and fpu .RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, + .BranchD, .BranchE, .JumpD, .JumpE, // Writeback stage .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, diff --git a/testbench/tests.vh b/testbench/tests.vh index 1e1065cf1..ec6f04f43 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -50,9 +50,9 @@ string tvpaths[] = '{ string embench[] = '{ `EMBENCH, + "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", - "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/matmult-int/matmult-int",