Merge pull request #80 from ross144/main

Branch predictor acuracy fixes caused by last two weeks optimazations"
This commit is contained in:
David Harris 2023-02-15 09:39:26 -08:00 committed by GitHub
commit abf3fbbebf
11 changed files with 84 additions and 71 deletions

View File

@ -63,6 +63,8 @@ module controller(
output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic RegWriteM, // Instruction writes a register (needed for Hazard unit)
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid
output logic BranchD, BranchE,
output logic JumpD,
output logic FWriteIntM, // FPU controller writes integer register file output logic FWriteIntM, // FPU controller writes integer register file
// Writeback stage control signals // Writeback stage control signals
@ -85,8 +87,6 @@ module controller(
logic RegWriteD, RegWriteE; // RegWrite (register will be written) logic RegWriteD, RegWriteE; // RegWrite (register will be written)
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file
logic [1:0] MemRWD, MemRWE; // Store (write to memory) logic [1:0] MemRWD, MemRWE; // Store (write to memory)
logic JumpD; // Jump instruction
logic BranchD, BranchE; // Branch instruction
logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3)
logic [2:0] ALUControlD; // Determines ALU operation logic [2:0] ALUControlD; // Determines ALU operation
logic ALUSrcAD, ALUSrcBD; // ALU inputs logic ALUSrcAD, ALUSrcBD; // ALU inputs

View File

@ -55,6 +55,8 @@ module ieu (
input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid
output logic BranchD, BranchE,
output logic JumpD, JumpE,
// Writeback stage signals // Writeback stage signals
input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
input logic [`XLEN-1:0] CSRReadValW, // CSR read value, input logic [`XLEN-1:0] CSRReadValW, // CSR read value,
@ -87,7 +89,6 @@ module ieu (
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction logic MemReadE, CSRReadE; // Load, CSRRead instruction
logic JumpE; // Jump instruction
logic BranchSignedE; // Branch does signed comparison on operands logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction logic MDUE; // Multiply/divide instruction
@ -95,7 +96,7 @@ module ieu (
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
.IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
.PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE, .PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE,
.Funct3E, .IntDivE, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD);

View File

@ -34,7 +34,9 @@ module RASPredictor #(parameter int StackSize = 16 )(
input logic reset, input logic reset,
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong
input logic [3:0] InstrClassD, InstrClassE, PredInstrClassF, // Instr class input logic [3:0] InstrClassD,
input logic [3:0] InstrClassE, // Instr class
input logic [3:0] PredInstrClassF,
input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal
output logic [`XLEN-1:0] RASPCF // Top of the stack output logic [`XLEN-1:0] RASPCF // Top of the stack
); );
@ -93,6 +95,3 @@ module RASPredictor #(parameter int StackSize = 16 )(
endmodule endmodule

View File

@ -52,6 +52,8 @@ module bpred (
// Branch and jump outcome // Branch and jump outcome
input logic InstrValidD, InstrValidE, input logic InstrValidD, InstrValidE,
input logic BranchD, BranchE,
input logic JumpD, JumpE,
input logic PCSrcE, // Executation stage branch is taken input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
@ -70,11 +72,12 @@ module bpred (
logic PredValidF; logic PredValidF;
logic [1:0] DirPredictionF; logic [1:0] DirPredictionF;
logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE;
logic [`XLEN-1:0] PredPCF, RASPCF; logic [`XLEN-1:0] PredPCF, RASPCF;
logic PredictionPCWrongE; logic PredictionPCWrongE;
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; logic [3:0] InstrClassD;
logic [3:0] InstrClassE;
logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE;
logic SelBPPredF; logic SelBPPredF;
@ -83,9 +86,9 @@ module bpred (
logic [`XLEN-1:0] PCCorrectE; logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD; logic [3:0] WrongPredInstrClassD;
logic BTBTargetWrongE; logic BTBTargetWrongE;
logic RASTargetWrongE; logic RASTargetWrongE;
logic JumpOrTakenBranchE; logic JumpOrTakenBranchE;
logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE; logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE;
@ -104,17 +107,17 @@ module bpred (
end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor
speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.DirPredictionF, .DirPredictionWrongE, .DirPredictionF, .DirPredictionWrongE,
.PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE);
end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor
gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE);
end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor
speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE,
.PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE);
end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
// *** Fix me // *** Fix me
@ -148,7 +151,8 @@ module bpred (
if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode
logic [4:0] CompressedOpcF; logic [4:0] CompressedOpcF;
logic [3:0] InstrClassF; logic [3:0] InstrClassF;
logic cjal, cj, cjr, cjalr; logic cjal, cj, cjr, cjalr, CJumpF, CBranchF;
logic JumpF, BranchF;
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
@ -156,30 +160,27 @@ module bpred (
assign cj = CompressedOpcF == 5'h0d; assign cj = CompressedOpcF == 5'h0d;
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = cjal | cj | cjr | cjalr;
assign CBranchF = CompressedOpcF[4:1] == 4'h7;
assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
(`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); assign BranchF = PostSpillInstrRawF[6:0] == 7'h63;
assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF);
(PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (cjal | cj | cj | cjalr));
(`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5
assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5
(`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5
(`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
assign PredInstrClassF = InstrClassF; assign PredInstrClassF = InstrClassF;
assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) |
PredInstrClassF[2] | PredInstrClassF[1];
PredInstrClassF[1] |
PredInstrClassF[3];
end else begin end else begin
assign PredInstrClassF = BTBPredInstrClassF; assign PredInstrClassF = BTBPredInstrClassF;
assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) |
PredInstrClassF[2] | PredInstrClassF[1] & PredValidF;
(PredInstrClassF[1] & PredValidF) |
(PredInstrClassF[3] & PredValidF);
end end
// Part 3 RAS // Part 3 RAS
@ -189,15 +190,13 @@ module bpred (
assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF;
assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 assign InstrClassD[0] = BranchD;
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 assign InstrClassD[1] = JumpD ;
assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5
(InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE);
flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM);
flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW);
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// branch predictor // branch predictor
@ -208,6 +207,7 @@ module bpred (
// pipeline the class // pipeline the class
flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD);
flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE);
flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE);
// Check the prediction // Check the prediction
// if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
@ -218,11 +218,12 @@ module bpred (
assign PredictionPCWrongE = PCCorrectE != PCD; assign PredictionPCWrongE = PCCorrectE != PCD;
// branch class prediction wrong. // branch class prediction wrong.
assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0];
assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD;
// branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions.
assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE));
//assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark
// Output the predicted PC or corrected PC on miss-predict. // Output the predicted PC or corrected PC on miss-predict.
// Selects the BP or PC+2/4. // Selects the BP or PC+2/4.
@ -247,10 +248,10 @@ module bpred (
// could be wrong or the fall through address selected for branch predict not taken. // could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies. // both without the above inaccuracies.
assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE;
assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1];
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);

View File

@ -53,7 +53,6 @@ module btb #(parameter int Depth = 10 ) (
logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF;
logic [`XLEN-1:0] PredPCD; logic [`XLEN-1:0] PredPCD;
logic [3:0] PredInstrClassD; // *** copy of reg outside module
logic UpdateEn; logic UpdateEn;
logic TablePredValidF, PredValidD; logic TablePredValidF, PredValidD;
@ -96,8 +95,6 @@ module btb #(parameter int Depth = 10 ) (
if(~StallF | reset) TablePredValidF = ValidBits[PCNextFIndex]; if(~StallF | reset) TablePredValidF = ValidBits[PCNextFIndex];
end end
//assign PredValidF = MatchXF ? 1'b1 : TablePredValidF;
assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE;
// An optimization may be using a PC relative address. // An optimization may be using a PC relative address.

View File

@ -31,12 +31,12 @@
module gshare #(parameter k = 10) ( module gshare #(parameter k = 10) (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic StallF, StallD, StallE, StallM, input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] DirPredictionF, output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE, output logic DirPredictionWrongE,
// update // update
input logic [`XLEN-1:0] PCNextF, PCE, input logic [`XLEN-1:0] PCNextF, PCM,
input logic BranchInstrE, BranchInstrM, PCSrcE input logic BranchInstrE, BranchInstrM, PCSrcE
); );
@ -44,20 +44,20 @@ module gshare #(parameter k = 10) (
logic [1:0] DirPredictionD, DirPredictionE; logic [1:0] DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE, NewDirPredictionM; logic [1:0] NewDirPredictionE, NewDirPredictionM;
logic [k-1:0] GHRF, GHRD, GHRE, GHR; logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR;
logic [k-1:0] GHRNext; logic [k-1:0] GHRNext;
logic PCSrcM; logic PCSrcM;
assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; assign IndexE = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF), .ce2(~StallM & ~FlushM), .ce1(~StallF), .ce2(~StallM & ~FlushM),
.ra1(IndexNextF), .ra1(IndexNextF),
.rd1(DirPredictionF), .rd1(DirPredictionF),
.wa2(IndexE), .wa2(IndexE),
.wd2(NewDirPredictionE), .wd2(NewDirPredictionM),
.we2(BranchInstrE & ~StallM & ~FlushM), .we2(BranchInstrM & ~StallW & ~FlushW),
.bwe2(1'b1)); .bwe2(1'b1));
flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD);
@ -75,6 +75,7 @@ module gshare #(parameter k = 10) (
flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF);
flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD);
flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE);
flopenrc #(k) GHRMReg(clk, reset, FlushM, ~StallM, GHRE, GHRM);
endmodule endmodule

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// speculativeglobalhistory.sv // gsharePredictor.sv
// //
// Written: Shreya Sanghai // Written: Shreya Sanghai
// Email: ssanghai@hmc.edu // Email: ssanghai@hmc.edu
@ -36,7 +36,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
output logic [1:0] DirPredictionF, output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE, output logic DirPredictionWrongE,
// update // update
input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, input logic [3:0] PredInstrClassF,
input logic [3:0] InstrClassD, InstrClassE, InstrClassM,
input logic [3:0] WrongPredInstrClassD, input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE input logic PCSrcE
); );
@ -47,9 +48,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE; logic [1:0] NewDirPredictionE;
logic [k-1:0] GHRF, GHRD, GHRE; logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
logic GHRLastF; logic GHRLastF;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM;
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
@ -57,8 +58,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
assign IndexNextF = GHRNextF; assign IndexNextF = GHRNextF;
assign IndexF = GHRF; assign IndexF = GHRF;
assign IndexD = GHRD[k-1:0]; assign IndexD = GHRD;
assign IndexE = GHRE[k-1:0]; assign IndexE = GHRE;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallM & ~FlushM), .ce1(~StallF | reset), .ce2(~StallM & ~FlushM),
@ -111,15 +112,18 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
// If it is wrong and branch does exist then shift right and insert the prediction. // If it is wrong and branch does exist then shift right and insert the prediction.
// If the branch does not exist then shift left and use GHRLastF to restore the LSB. // If the branch does not exist then shift left and use GHRLastF to restore the LSB.
logic [k-1:0] GHRClassWrong; logic [k-1:0] GHRClassWrong;
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong);
// As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE.
mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD);
flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD);
mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE);
assign GHRNextM = FlushM ? GHRM : GHRE;
flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0];

View File

@ -37,7 +37,8 @@ module speculativegshare #(parameter int k = 10 ) (
output logic DirPredictionWrongE, output logic DirPredictionWrongE,
// update // update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE,
input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, input logic [3:0] PredInstrClassF,
input logic [3:0] InstrClassD, InstrClassE, InstrClassM,
input logic [3:0] WrongPredInstrClassD, input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE input logic PCSrcE
); );
@ -48,9 +49,9 @@ module speculativegshare #(parameter int k = 10 ) (
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE; logic [1:0] NewDirPredictionE;
logic [k-1:0] GHRF, GHRD, GHRE; logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
logic GHRLastF; logic GHRLastF;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM;
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
@ -112,15 +113,18 @@ module speculativegshare #(parameter int k = 10 ) (
// If it is wrong and branch does exist then shift right and insert the prediction. // If it is wrong and branch does exist then shift right and insert the prediction.
// If the branch does not exist then shift left and use GHRLastF to restore the LSB. // If the branch does not exist then shift left and use GHRLastF to restore the LSB.
logic [k-1:0] GHRClassWrong; logic [k-1:0] GHRClassWrong;
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong);
// As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE.
mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD);
flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD);
mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE);
assign GHRNextM = FlushM ? GHRM : GHRE;
flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0];

View File

@ -36,6 +36,8 @@ module ifu (
input logic InvalidateICacheM, // Clears all instruction cache valid bits input logic InvalidateICacheM, // Clears all instruction cache valid bits
input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE) input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE)
input logic InstrValidD, InstrValidE, InstrValidM, input logic InstrValidD, InstrValidE, InstrValidM,
input logic BranchD, BranchE,
input logic JumpD, JumpE,
// Bus interface // Bus interface
output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU
input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU
@ -324,6 +326,7 @@ module ifu (
bpred bpred(.clk, .reset, bpred bpred(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .StallF, .StallD, .StallE, .StallM, .StallW,
.FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE,
.BranchD, .BranchE, .JumpD, .JumpE,
.InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE,
.PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM,
.DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM);

View File

@ -162,11 +162,13 @@ module wallypipelinedcore (
logic FCvtIntE; logic FCvtIntE;
logic CommittedF; logic CommittedF;
logic JumpOrTakenBranchM; logic JumpOrTakenBranchM;
logic BranchD, BranchE, JumpD, JumpE;
// instruction fetch unit: PC, branch prediction, instruction cache // instruction fetch unit: PC, branch prediction, instruction cache
ifu ifu(.clk, .reset, ifu ifu(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.InstrValidM, .InstrValidE, .InstrValidD, .InstrValidM, .InstrValidE, .InstrValidD,
.BranchD, .BranchE, .JumpD, .JumpE,
// Fetch // Fetch
.HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F,
.IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE,
@ -200,6 +202,7 @@ module wallypipelinedcore (
.Funct3M, // size and signedness to LSU .Funct3M, // size and signedness to LSU
.SrcAM, // to privilege and fpu .SrcAM, // to privilege and fpu
.RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, .RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
.BranchD, .BranchE, .JumpD, .JumpE,
// Writeback stage // Writeback stage
.CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW,

View File

@ -50,9 +50,9 @@ string tvpaths[] = '{
string embench[] = '{ string embench[] = '{
`EMBENCH, `EMBENCH,
"bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches
"bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/aha-mont64/aha-mont64",
"bd_speedopt_speed/src/crc32/crc32", "bd_speedopt_speed/src/crc32/crc32",
"bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches
"bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/edn/edn",
"bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/huffbench/huffbench",
"bd_speedopt_speed/src/matmult-int/matmult-int", "bd_speedopt_speed/src/matmult-int/matmult-int",