From 9c4da7381f4e2370b57d51c2f02f9cb1ed12cfc3 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 10 Feb 2023 15:45:56 -0600 Subject: [PATCH 01/21] Experimental branch prediction optimization. --- src/ieu/controller.sv | 4 ++-- src/ieu/ieu.sv | 5 +++-- src/ifu/bpred/bpred.sv | 23 ++++++++++++++++------- src/ifu/ifu.sv | 5 ++++- src/wally/wallypipelinedcore.sv | 3 +++ 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 108b0bb1..d6642534 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -63,6 +63,8 @@ module controller( output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid + output logic BranchD, BranchE, + output logic JumpD, output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals @@ -85,8 +87,6 @@ module controller( logic RegWriteD, RegWriteE; // RegWrite (register will be written) logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file logic [1:0] MemRWD, MemRWE; // Store (write to memory) - logic JumpD; // Jump instruction - logic BranchD, BranchE; // Branch instruction logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) logic [2:0] ALUControlD; // Determines ALU operation logic ALUSrcAD, ALUSrcBD; // ALU inputs diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 9df95040..9d3a833e 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -55,6 +55,8 @@ module ieu ( input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid + output logic BranchD, BranchE, + output logic JumpD, JumpE, // Writeback stage signals input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) input logic [`XLEN-1:0] CSRReadValW, // CSR read value, @@ -87,7 +89,6 @@ module ieu ( logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages logic MemReadE, CSRReadE; // Load, CSRRead instruction - logic JumpE; // Jump instruction logic BranchSignedE; // Branch does signed comparison on operands logic MDUE; // Multiply/divide instruction @@ -95,7 +96,7 @@ module ieu ( .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE, - .Funct3E, .IntDivE, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, + .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index c2ad9ac9..82bc5323 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -52,6 +52,8 @@ module bpred ( // Branch and jump outcome input logic InstrValidD, InstrValidE, + input logic BranchD, BranchE, + input logic JumpD, JumpE, input logic PCSrcE, // Executation stage branch is taken input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) @@ -189,11 +191,17 @@ module bpred ( assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; - assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 - assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch + //assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + //assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 + //assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + // (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 + //assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch + assign InstrClassD[0] = BranchD; + assign InstrClassD[1] = JumpD ; + assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 + assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + + flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); @@ -247,10 +255,11 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; + //assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; + assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; + assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1]; flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 68350bac..51317e0b 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -36,6 +36,8 @@ module ifu ( input logic InvalidateICacheM, // Clears all instruction cache valid bits input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE) input logic InstrValidD, InstrValidE, InstrValidM, + input logic BranchD, BranchE, + input logic JumpD, JumpE, // Bus interface output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU @@ -323,7 +325,8 @@ module ifu ( if (`BPRED_SUPPORTED) begin : bpred bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, - .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, + .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, + .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 3a57b9ae..a03caea1 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -162,11 +162,13 @@ module wallypipelinedcore ( logic FCvtIntE; logic CommittedF; logic JumpOrTakenBranchM; + logic BranchD, BranchE, JumpD, JumpE; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, + .BranchD, .BranchE, .JumpD, .JumpE, // Fetch .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, @@ -200,6 +202,7 @@ module wallypipelinedcore ( .Funct3M, // size and signedness to LSU .SrcAM, // to privilege and fpu .RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, + .BranchD, .BranchE, .JumpD, .JumpE, // Writeback stage .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, From 1e0667db1db8b4d45a611fdc002dc8ed0a388816 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 10 Feb 2023 17:09:35 -0600 Subject: [PATCH 02/21] More simplifications to the BP. --- src/ifu/bpred/bpred.sv | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 82bc5323..0a01469d 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -76,7 +76,7 @@ module bpred ( logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; + logic [3:0] InstrClassF, InstrClassD, InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; logic SelBPPredF; @@ -154,6 +154,7 @@ module bpred ( assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; +// *** still need to update to use inclusive jump assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; assign cj = CompressedOpcF == 5'h0d; assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; @@ -162,9 +163,10 @@ module bpred ( assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); - assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 - (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); + //assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + // (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 + // (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); + assign InstrClassF[1] = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F | (`C_SUPPORTED & (cjal | cj | cj | cjalr)); assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); @@ -173,15 +175,11 @@ module bpred ( (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[2] | - PredInstrClassF[1] | - PredInstrClassF[3]; + PredInstrClassF[1]; end else begin assign PredInstrClassF = BTBPredInstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | - PredInstrClassF[2] | - (PredInstrClassF[1] & PredValidF) | - (PredInstrClassF[3] & PredValidF); + PredInstrClassF[1] & PredValidF; end // Part 3 RAS @@ -191,11 +189,6 @@ module bpred ( assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; - //assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - //assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - //assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - // (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 - //assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch assign InstrClassD[0] = BranchD; assign InstrClassD[1] = JumpD ; assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 @@ -205,7 +198,6 @@ module bpred ( flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); - flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor From c59dfc1e30e3816191d96f7169306141f1a6a9d6 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sat, 11 Feb 2023 19:59:03 -0800 Subject: [PATCH 03/21] fixed typo in LZC --- src/generic/lzc.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generic/lzc.sv b/src/generic/lzc.sv index 60719c49..ecfd6796 100644 --- a/src/generic/lzc.sv +++ b/src/generic/lzc.sv @@ -33,6 +33,6 @@ module lzc #(parameter WIDTH = 1) ( always_comb begin i = 0; while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one - ZeroCnt = i[$clog2(WIDTH)-1:0]; + ZeroCnt = i[$clog2(WIDTH+1)-1:0]; end endmodule From 58749a8c5745a6df52264ebe4569679140b09209 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 12 Feb 2023 11:33:43 -0600 Subject: [PATCH 04/21] Removed another bit from btb class. --- src/ifu/bpred/RASPredictor.sv | 6 ++- src/ifu/bpred/bpred.sv | 47 ++++++++++++----------- src/ifu/bpred/btb.sv | 19 ++++----- src/ifu/bpred/speculativeglobalhistory.sv | 4 +- src/ifu/bpred/speculativegshare.sv | 5 ++- 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 0a841ae1..40fb5bb1 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,8 +33,10 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong - input logic [3:0] InstrClassD, InstrClassE, PredInstrClassF, // Instr class + input logic [2:0] WrongPredInstrClassD, // Prediction class is wrong + input logic [3:0] InstrClassD, + input logic [3:0] InstrClassE, // Instr class + input logic [2:0] PredInstrClassF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 0a01469d..8a1eb801 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,22 +72,24 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; + logic [2:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassF, InstrClassD, InstrClassE; + logic [2:0] InstrClassF; + logic [3:0] InstrClassD; + logic [3:0] InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; logic SelBPPredF; logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; - logic [3:0] WrongPredInstrClassD; + logic [2:0] WrongPredInstrClassD; - logic BTBTargetWrongE; - logic RASTargetWrongE; - logic JumpOrTakenBranchE; + logic BTBTargetWrongE; + logic RASTargetWrongE; + logic JumpOrTakenBranchE; logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE; @@ -149,30 +151,31 @@ module bpred ( // the branch predictor needs a compact decoding of the instruction class. if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [4:0] CompressedOpcF; - logic [3:0] InstrClassF; - logic cjal, cj, cjr, cjalr; + logic [2:0] InstrClassF; + logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; + logic JumpF, BranchF; assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; -// *** still need to update to use inclusive jump assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; assign cj = CompressedOpcF == 5'h0d; assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = cjal | cj | cjr | cjalr; + assign CBranchF = CompressedOpcF[4:1] == 4'h7; + + assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign BranchF = PostSpillInstrRawF[6:0] == 7'h63; - assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | - (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); - - //assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - // (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 - // (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); - assign InstrClassF[1] = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F | (`C_SUPPORTED & (cjal | cj | cj | cjalr)); - - assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 + assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF); + assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (cjal | cj | cj | cjalr)); + assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + //assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + // (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + + assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | PredInstrClassF[1]; @@ -206,7 +209,7 @@ module bpred ( {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + flopenrc #(3) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); // Check the prediction @@ -218,7 +221,7 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[2:0]; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index c538636d..2100eb35 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -36,7 +36,7 @@ module btb #(parameter int Depth = 10 ) ( input logic StallF, StallD, StallM, FlushD, FlushM, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, // PC at various stages output logic [`XLEN-1:0] PredPCF, // BTB's guess at PC - output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [2:0] BTBPredInstrClassF, // BTB's guess at instruction class output logic PredValidF, // BTB's guess is valid // update input logic AnyWrongPredInstrClassE, // BTB's instruction class guess was wrong @@ -50,10 +50,9 @@ module btb #(parameter int Depth = 10 ) ( logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex; logic [`XLEN-1:0] ResetPC; logic MatchF, MatchD, MatchE, MatchNextX, MatchXF; - logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+3:0] TableBTBPredictionF; + logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+2:0] TableBTBPredictionF; logic [`XLEN-1:0] PredPCD; - logic [3:0] PredInstrClassD; // *** copy of reg outside module logic UpdateEn; logic TablePredValidF, PredValidD; @@ -80,10 +79,10 @@ module btb #(parameter int Depth = 10 ) ( flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); assign ForwardBTBPrediction = MatchF ? {PredValidF, BTBPredInstrClassF, PredPCF} : - MatchD ? {PredValidD, InstrClassD, PredPCD} : - {1'b1, InstrClassE, IEUAdrE} ; + MatchD ? {PredValidD, InstrClassD[2:0], PredPCD} : + {1'b1, InstrClassE[2:0], IEUAdrE} ; - flopenr #(`XLEN+5) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); + flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); assign {PredValidF, BTBPredInstrClassF, PredPCF} = MatchXF ? ForwardBTBPredictionF : {TablePredValidF, TableBTBPredictionF}; @@ -96,14 +95,12 @@ module btb #(parameter int Depth = 10 ) ( if(~StallF | reset) TablePredValidF = ValidBits[PCNextFIndex]; end - //assign PredValidF = MatchXF ? 1'b1 : TablePredValidF; - assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; // An optimization may be using a PC relative address. - ram2p1r1wbe #(2**Depth, `XLEN+4) memory( + ram2p1r1wbe #(2**Depth, `XLEN+3) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), - .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE, IEUAdrE}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE[2:0], IEUAdrE}), .we2(UpdateEn), .bwe2('1)); flopenrc #(`XLEN+1) BTBD(clk, reset, FlushD, ~StallD, {PredValidF, PredPCF}, {PredValidD, PredPCD}); diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 51dbb422..645ac99e 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -36,8 +36,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, - input logic [3:0] WrongPredInstrClassD, + input logic [3:0] InstrClassD, InstrClassE, + input logic [2:0] PredInstrClassF, WrongPredInstrClassD, input logic PCSrcE ); diff --git a/src/ifu/bpred/speculativegshare.sv b/src/ifu/bpred/speculativegshare.sv index 1eb888a9..9d55dc87 100644 --- a/src/ifu/bpred/speculativegshare.sv +++ b/src/ifu/bpred/speculativegshare.sv @@ -37,8 +37,9 @@ module speculativegshare #(parameter int k = 10 ) ( output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, - input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE, - input logic [3:0] WrongPredInstrClassD, + input logic [2:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, + input logic [2:0] WrongPredInstrClassD, input logic PCSrcE ); From 1d74663f423cf85b946dc1d0c978f36f0bd17fb3 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 11:57:25 -0600 Subject: [PATCH 05/21] Partial fix for gshare bugs from the last two weeks. --- src/ifu/bpred/bpred.sv | 2 +- src/ifu/bpred/speculativegshare.sv | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 8a1eb801..0976275b 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -118,7 +118,7 @@ module bpred ( end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me diff --git a/src/ifu/bpred/speculativegshare.sv b/src/ifu/bpred/speculativegshare.sv index 9d55dc87..bfbd1bbd 100644 --- a/src/ifu/bpred/speculativegshare.sv +++ b/src/ifu/bpred/speculativegshare.sv @@ -38,7 +38,7 @@ module speculativegshare #(parameter int k = 10 ) ( // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, input logic [2:0] PredInstrClassF, - input logic [3:0] InstrClassD, InstrClassE, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, input logic [2:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -49,9 +49,9 @@ module speculativegshare #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -113,15 +113,18 @@ module speculativegshare #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; From f4af38a0044ce8b20d7c9d004dcb4196bc5c772f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 16:14:17 -0600 Subject: [PATCH 06/21] Hacked commit. Fixes the gshare bugs introduced last week. Need to recover the good changes in the next commit. --- src/ifu/bpred/RASPredictor.sv | 7 +- src/ifu/bpred/bpred.sv | 363 ++++++++++++++++++++++++++++- src/ifu/bpred/btb.sv | 16 +- src/ifu/bpred/speculativegshare.sv | 4 +- 4 files changed, 368 insertions(+), 22 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 40fb5bb1..330607af 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,10 +33,10 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [2:0] WrongPredInstrClassD, // Prediction class is wrong + input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong input logic [3:0] InstrClassD, input logic [3:0] InstrClassE, // Instr class - input logic [2:0] PredInstrClassF, + input logic [3:0] PredInstrClassF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -95,6 +95,3 @@ module RASPredictor #(parameter int StackSize = 16 )( endmodule - - - diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 0976275b..b833f99e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,11 +72,11 @@ module bpred ( logic PredValidF; logic [1:0] DirPredictionF; - logic [2:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [2:0] InstrClassF; + logic [3:0] InstrClassF; logic [3:0] InstrClassD; logic [3:0] InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; @@ -85,7 +85,7 @@ module bpred ( logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; - logic [2:0] WrongPredInstrClassD; + logic [3:0] WrongPredInstrClassD; logic BTBTargetWrongE; logic RASTargetWrongE; @@ -149,6 +149,8 @@ module bpred ( .InstrClassE); // the branch predictor needs a compact decoding of the instruction class. + ///// ********* THIS IS NOT THE ISSUE. +/* -----\/----- EXCLUDED -----\/----- if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [4:0] CompressedOpcF; logic [2:0] InstrClassF; @@ -184,6 +186,44 @@ module bpred ( assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | PredInstrClassF[1] & PredValidF; end + -----/\----- EXCLUDED -----/\----- */ + // the branch predictor needs a compact decoding of the instruction class. + if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode + logic [4:0] CompressedOpcF; + logic [3:0] InstrClassF; + logic cjal, cj, cjr, cjalr; + + assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; + + assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; + assign cj = CompressedOpcF == 5'h0d; + assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + + assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | + (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); + + assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 + (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); + + assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 + (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + + assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + assign PredInstrClassF = InstrClassF; + assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | + PredInstrClassF[2] | + PredInstrClassF[1] | + PredInstrClassF[3]; + end else begin + assign PredInstrClassF = BTBPredInstrClassF; + assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | + PredInstrClassF[2] | + (PredInstrClassF[1] & PredValidF) | + (PredInstrClassF[3] & PredValidF); + end // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, @@ -192,10 +232,17 @@ module bpred ( assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; +/* -----\/----- EXCLUDED -----\/----- assign InstrClassD[0] = BranchD; assign InstrClassD[1] = JumpD ; assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + -----/\----- EXCLUDED -----/\----- */ + assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 + assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 + assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch @@ -209,8 +256,10 @@ module bpred ( {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); // pipeline the class - flopenrc #(3) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + //flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); + assign AnyWrongPredInstrClassE = InstrClassE != PredInstrClassE; // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -221,11 +270,12 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[2:0]; + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | (AnyWrongPredInstrClassE & ~|InstrClassE); + //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -265,3 +315,302 @@ module bpred ( flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); endmodule +/* -----\/----- EXCLUDED -----\/----- +/////////////////////////////////////////// +// bpred.sv +// +// Written: Ross Thomposn ross1728@gmail.com +// Created: 12 February 2021 +// Modified: 19 January 2023 +// +// Purpose: Branch direction prediction and jump/branch target prediction. +// Prediction made during the fetch stage and corrected in the execution stage. +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +`define INSTR_CLASS_PRED 1 + +module bpred ( + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + // Fetch stage + // the prediction + input logic InstrValidD, InstrValidE, + input logic BranchD, BranchE, + input logic JumpD, JumpE, + input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class + input logic [`XLEN-1:0] PCNextF, // Next Fetch Address + input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 + output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction + output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage + + // Update Predictor + input logic [`XLEN-1:0] PCF, // Fetch stage instruction address + input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took + input logic [`XLEN-1:0] PCE, // Execution stage instruction address + input logic [`XLEN-1:0] PCM, // Memory stage instruction address + + input logic [31:0] PostSpillInstrRawF, // Instruction + + // Branch and jump outcome + input logic PCSrcE, // Executation stage branch is taken + input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address + input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + + // Report branch prediction status + output logic BPPredWrongE, // Prediction is wrong + output logic BPPredWrongM, // Prediction is wrong + output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BTBPredPCWrongM, // Prediction target wrong + output logic RASPredPCWrongM, // RAS prediction is wrong + output logic PredictionInstrClassWrongM // Class prediction is wrong + ); + + logic PredValidF; + logic [1:0] DirPredictionF; + + logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; + logic [`XLEN-1:0] PredPCF, RASPCF; + logic TargetWrongE; + logic FallThroughWrongE; + logic PredictionPCWrongE; + logic PredictionInstrClassWrongE; + logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; + logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; + + logic SelBPPredF; + logic [`XLEN-1:0] BPPredPCF; + logic [`XLEN-1:0] PCNext0F; + logic [`XLEN-1:0] PCCorrectE; + logic [3:0] WrongPredInstrClassD; + + + logic BTBTargetWrongE; + logic RASTargetWrongE; + logic JumpOrTakenBranchE; + + logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE; + + // Part 1 branch direction prediction + // look into the 2 port Sram model. something is wrong. + if (`BPRED_TYPE == "BPTWOBIT") begin:Predictor + twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + + end else if (`BPRED_TYPE == "BPGLOBAL") begin:Predictor + globalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + +/-* -----\/----- EXCLUDED -----\/----- + end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor + speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .DirPredictionF, .DirPredictionWrongE, + .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE); + + end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor + gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + .PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, + .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + -----/\----- EXCLUDED -----/\----- *-/ + + end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor + speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); + + end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor + // *** Fix me +/-* -----\/----- EXCLUDED -----\/----- + localHistoryPredictor DirPredictor(.clk, + .reset, .StallF, .StallE, + .LookUpPC(PCNextF), + .Prediction(DirPredictionF), + // update + .UpdatePC(PCE), + .UpdateEN(InstrClassE[0] & ~StallE), + .PCSrcE, + .UpdatePrediction(InstrClassE[0])); + -----/\----- EXCLUDED -----/\----- *-/ + end + + // this predictor will have two pieces of data, + // 1) A direction (1 = Taken, 0 = Not Taken) + // 2) Any information which is necessary for the predictor to build its next state. + // For a 2 bit table this is the prediction count. + + // Part 2 Branch target address prediction + // *** For now the BTB will house the direct and indirect targets + + btb TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM, + .PCNextF, .PCF, .PCD, .PCE, + .PredPCF, + .BTBPredInstrClassF, + .PredValidF, + .AnyWrongPredInstrClassE(PredictionInstrClassWrongE), + .IEUAdrE, + .InstrClassD, + .InstrClassE); + + // the branch predictor needs a compact decoding of the instruction class. + if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode + logic [4:0] CompressedOpcF; + logic [3:0] InstrClassF; + logic cjal, cj, cjr, cjalr; + + assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; + + assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; + assign cj = CompressedOpcF == 5'h0d; + assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + + assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | + (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); + + assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 + (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); + + assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 + (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + + assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + assign PredInstrClassF = InstrClassF; + assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | + PredInstrClassF[2] | + PredInstrClassF[1] | + PredInstrClassF[3]; + end else begin + assign PredInstrClassF = BTBPredInstrClassF; + assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | + PredInstrClassF[2] | + (PredInstrClassF[1] & PredValidF) | + (PredInstrClassF[3] & PredValidF); + end + + // Part 3 RAS + RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + .PredInstrClassF, .InstrClassD, .InstrClassE, + .WrongPredInstrClassD, .RASPCF, .PCLinkE); + + assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; + + assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 + assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return + (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 + assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch + + flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); + flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); + flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); + + // branch predictor + flopenrc #(4) BPPredWrongRegM(clk, reset, FlushM, ~StallM, + {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, PredictionInstrClassWrongE}, + {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); + + // pipeline the class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); + flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); + + // Check the prediction + // first check if the target or fallthrough address matches what was predicted. + assign TargetWrongE = IEUAdrE != PCD; + assign FallThroughWrongE = PCLinkE != PCD; + // If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected + // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi + // it must have selected the fall through. + assign PredictionPCWrongE = PCCorrectE != PCD; + +// assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE); + + // The branch direction also need to checked. + // However if the direction is wrong then the pc will be wrong. This is only relavent to checking the + // accuracy of the direciton prediction. + //assign DirPredictionWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; + + // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. + // Also we want to track this in a performance counter. + assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE; + + // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about + // the direction or class, but correct about the target we don't have the flush the pipeline. However we still + // need this information to verify the accuracy of the predictors. + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; +// assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + + // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. + //assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PredictionPCWrongE; + //assign BTBPredPCWrongE = TargetWrongE & (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PCSrcE; + assign BTBPredPCWrongE = BTBTargetWrongE; + + // similar with RAS. Over counts ras if the class prediction was wrong. + //assign RASPredPCWrongE = TargetWrongE & InstrClassE[2] & PCSrcE; + assign RASPredPCWrongE = RASTargetWrongE; + // Finally if the real instruction class is non CFI but the predictor said it was we need to count. + assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; + + // branch class prediction wrong. + assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; + + // Selects the BP or PC+2/4. + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); + // If the prediction is wrong select the correct address. + mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); + // Correct branch/jump target. + mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); + + // If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE. + // Effectively this is PCM+4 or the non-existant PCLinkM + // if(`BPCLASS) begin + mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); + // end else begin + // assign NextValidPCE = PCE; + // end + + // performance counters + // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now + // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) + // 3. target ras (ras target wrong / class[2]) + // 4. direction (br dir wrong / class[0]) + + assign BTBTargetWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; + assign RASTargetWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; + + assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; + + flopenrc #(`XLEN) BTBTargetDReg(clk, reset, FlushD, ~StallD, PredPCF, PredPCD); + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, PredPCD, PredPCE); + + flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); + flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); + +endmodule + -----/\----- EXCLUDED -----/\----- */ diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 2100eb35..7893ace5 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -36,7 +36,7 @@ module btb #(parameter int Depth = 10 ) ( input logic StallF, StallD, StallM, FlushD, FlushM, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, // PC at various stages output logic [`XLEN-1:0] PredPCF, // BTB's guess at PC - output logic [2:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class output logic PredValidF, // BTB's guess is valid // update input logic AnyWrongPredInstrClassE, // BTB's instruction class guess was wrong @@ -50,8 +50,8 @@ module btb #(parameter int Depth = 10 ) ( logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex; logic [`XLEN-1:0] ResetPC; logic MatchF, MatchD, MatchE, MatchNextX, MatchXF; - logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+2:0] TableBTBPredictionF; + logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN-1:0] PredPCD; logic UpdateEn; logic TablePredValidF, PredValidD; @@ -79,10 +79,10 @@ module btb #(parameter int Depth = 10 ) ( flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); assign ForwardBTBPrediction = MatchF ? {PredValidF, BTBPredInstrClassF, PredPCF} : - MatchD ? {PredValidD, InstrClassD[2:0], PredPCD} : - {1'b1, InstrClassE[2:0], IEUAdrE} ; + MatchD ? {PredValidD, InstrClassD, PredPCD} : + {1'b1, InstrClassE, IEUAdrE} ; - flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); + flopenr #(`XLEN+5) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); assign {PredValidF, BTBPredInstrClassF, PredPCF} = MatchXF ? ForwardBTBPredictionF : {TablePredValidF, TableBTBPredictionF}; @@ -98,9 +98,9 @@ module btb #(parameter int Depth = 10 ) ( assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE; // An optimization may be using a PC relative address. - ram2p1r1wbe #(2**Depth, `XLEN+3) memory( + ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), - .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE[2:0], IEUAdrE}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE, IEUAdrE}), .we2(UpdateEn), .bwe2('1)); flopenrc #(`XLEN+1) BTBD(clk, reset, FlushD, ~StallD, {PredValidF, PredPCF}, {PredValidD, PredPCD}); diff --git a/src/ifu/bpred/speculativegshare.sv b/src/ifu/bpred/speculativegshare.sv index bfbd1bbd..1dd7d4cd 100644 --- a/src/ifu/bpred/speculativegshare.sv +++ b/src/ifu/bpred/speculativegshare.sv @@ -37,9 +37,9 @@ module speculativegshare #(parameter int k = 10 ) ( output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, - input logic [2:0] PredInstrClassF, + input logic [3:0] PredInstrClassF, input logic [3:0] InstrClassD, InstrClassE, InstrClassM, - input logic [2:0] WrongPredInstrClassD, + input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); From 717cba270c81889de9ae4622d6b69d318d99044d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 17:10:24 -0600 Subject: [PATCH 07/21] Partial improvement. --- src/ifu/bpred/bpred.sv | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index b833f99e..a6cb0ccb 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -220,9 +220,7 @@ module bpred ( end else begin assign PredInstrClassF = BTBPredInstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | - PredInstrClassF[2] | - (PredInstrClassF[1] & PredValidF) | - (PredInstrClassF[3] & PredValidF); + PredInstrClassF[1] & PredValidF; end // Part 3 RAS @@ -232,17 +230,18 @@ module bpred ( assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; -/* -----\/----- EXCLUDED -----\/----- assign InstrClassD[0] = BranchD; assign InstrClassD[1] = JumpD ; assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - -----/\----- EXCLUDED -----/\----- */ + +/* -----\/----- EXCLUDED -----\/----- assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch + -----/\----- EXCLUDED -----/\----- */ @@ -274,7 +273,7 @@ module bpred ( assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | (AnyWrongPredInstrClassE & ~|InstrClassE); + assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // Output the predicted PC or corrected PC on miss-predict. From a80dbd3aec06deb87eab65534ff4fdb7b797d551 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 17:23:56 -0600 Subject: [PATCH 08/21] Further branch predictor improvements. --- src/ifu/bpred/bpred.sv | 59 +++--------------------------------------- 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index a6cb0ccb..c86c0979 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -149,11 +149,9 @@ module bpred ( .InstrClassE); // the branch predictor needs a compact decoding of the instruction class. - ///// ********* THIS IS NOT THE ISSUE. -/* -----\/----- EXCLUDED -----\/----- if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [4:0] CompressedOpcF; - logic [2:0] InstrClassF; + logic [3:0] InstrClassF; logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; logic JumpF, BranchF; @@ -174,49 +172,12 @@ module bpred ( assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - //assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - // (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - - - assign PredInstrClassF = InstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; - end else begin - assign PredInstrClassF = BTBPredInstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | - PredInstrClassF[1] & PredValidF; - end - -----/\----- EXCLUDED -----/\----- */ - // the branch predictor needs a compact decoding of the instruction class. - if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode - logic [4:0] CompressedOpcF; - logic [3:0] InstrClassF; - logic cjal, cj, cjr, cjalr; - - assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - - assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; - assign cj = CompressedOpcF == 5'h0d; - assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - - assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | - (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); - - assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 - (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); - - assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - - assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[2] | PredInstrClassF[1] | - PredInstrClassF[3]; end else begin assign PredInstrClassF = BTBPredInstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | @@ -235,16 +196,6 @@ module bpred ( assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 -/* -----\/----- EXCLUDED -----\/----- - assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 - assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch - -----/\----- EXCLUDED -----/\----- */ - - - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); @@ -256,9 +207,8 @@ module bpred ( // pipeline the class flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - //flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); - assign AnyWrongPredInstrClassE = InstrClassE != PredInstrClassE; // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -299,7 +249,6 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - //assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; From b298a8afc56f4daa200df9b012f06f320ff8cba7 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 17:29:51 -0600 Subject: [PATCH 09/21] Created copy of gshare. I think there may be a simpler implementation. --- src/ifu/bpred/bpred.sv | 304 +---------------------------------- src/ifu/bpred/gshare_copy.sv | 80 +++++++++ testbench/tests.vh | 2 +- 3 files changed, 83 insertions(+), 303 deletions(-) create mode 100644 src/ifu/bpred/gshare_copy.sv diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index c86c0979..69bec3e7 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -76,7 +76,6 @@ module bpred ( logic [`XLEN-1:0] PredPCF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassF; logic [3:0] InstrClassD; logic [3:0] InstrClassE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE; @@ -177,7 +176,7 @@ module bpred ( assign PredInstrClassF = InstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1] | + PredInstrClassF[1]; end else begin assign PredInstrClassF = BTBPredInstrClassF; assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | @@ -224,7 +223,7 @@ module bpred ( // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); - //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + //assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. @@ -263,302 +262,3 @@ module bpred ( flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); endmodule -/* -----\/----- EXCLUDED -----\/----- -/////////////////////////////////////////// -// bpred.sv -// -// Written: Ross Thomposn ross1728@gmail.com -// Created: 12 February 2021 -// Modified: 19 January 2023 -// -// Purpose: Branch direction prediction and jump/branch target prediction. -// Prediction made during the fetch stage and corrected in the execution stage. -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -`define INSTR_CLASS_PRED 1 - -module bpred ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushD, FlushE, FlushM, FlushW, - // Fetch stage - // the prediction - input logic InstrValidD, InstrValidE, - input logic BranchD, BranchE, - input logic JumpD, JumpE, - input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class - input logic [`XLEN-1:0] PCNextF, // Next Fetch Address - input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 - output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction - output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage - - // Update Predictor - input logic [`XLEN-1:0] PCF, // Fetch stage instruction address - input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took - input logic [`XLEN-1:0] PCE, // Execution stage instruction address - input logic [`XLEN-1:0] PCM, // Memory stage instruction address - - input logic [31:0] PostSpillInstrRawF, // Instruction - - // Branch and jump outcome - input logic PCSrcE, // Executation stage branch is taken - input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address - input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - - // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong - output logic DirPredictionWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong - output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM // Class prediction is wrong - ); - - logic PredValidF; - logic [1:0] DirPredictionF; - - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE; - logic [`XLEN-1:0] PredPCF, RASPCF; - logic TargetWrongE; - logic FallThroughWrongE; - logic PredictionPCWrongE; - logic PredictionInstrClassWrongE; - logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW; - logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; - - logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF; - logic [`XLEN-1:0] PCNext0F; - logic [`XLEN-1:0] PCCorrectE; - logic [3:0] WrongPredInstrClassD; - - - logic BTBTargetWrongE; - logic RASTargetWrongE; - logic JumpOrTakenBranchE; - - logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE; - - // Part 1 branch direction prediction - // look into the 2 port Sram model. something is wrong. - if (`BPRED_TYPE == "BPTWOBIT") begin:Predictor - twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); - - end else if (`BPRED_TYPE == "BPGLOBAL") begin:Predictor - globalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); - -/-* -----\/----- EXCLUDED -----\/----- - end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor - speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), - .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE); - - end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor - gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); - -----/\----- EXCLUDED -----/\----- *-/ - - end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor - speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); - - end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor - // *** Fix me -/-* -----\/----- EXCLUDED -----\/----- - localHistoryPredictor DirPredictor(.clk, - .reset, .StallF, .StallE, - .LookUpPC(PCNextF), - .Prediction(DirPredictionF), - // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0] & ~StallE), - .PCSrcE, - .UpdatePrediction(InstrClassE[0])); - -----/\----- EXCLUDED -----/\----- *-/ - end - - // this predictor will have two pieces of data, - // 1) A direction (1 = Taken, 0 = Not Taken) - // 2) Any information which is necessary for the predictor to build its next state. - // For a 2 bit table this is the prediction count. - - // Part 2 Branch target address prediction - // *** For now the BTB will house the direct and indirect targets - - btb TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM, - .PCNextF, .PCF, .PCD, .PCE, - .PredPCF, - .BTBPredInstrClassF, - .PredValidF, - .AnyWrongPredInstrClassE(PredictionInstrClassWrongE), - .IEUAdrE, - .InstrClassD, - .InstrClassE); - - // the branch predictor needs a compact decoding of the instruction class. - if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode - logic [4:0] CompressedOpcF; - logic [3:0] InstrClassF; - logic cjal, cj, cjr, cjalr; - - assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - - assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; - assign cj = CompressedOpcF == 5'h0d; - assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - - assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 | - (`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7); - - assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5 - (`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) )); - - assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - - assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - assign PredInstrClassF = InstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[2] | - PredInstrClassF[1] | - PredInstrClassF[3]; - end else begin - assign PredInstrClassF = BTBPredInstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) | - PredInstrClassF[2] | - (PredInstrClassF[1] & PredValidF) | - (PredInstrClassF[3] & PredValidF); - end - - // Part 3 RAS - RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PredInstrClassF, .InstrClassD, .InstrClassE, - .WrongPredInstrClassD, .RASPCF, .PCLinkE); - - assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF; - - assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 - assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return - (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5 - assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch - - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - - // branch predictor - flopenrc #(4) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, PredictionInstrClassWrongE}, - {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); - - // pipeline the class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE); - - // Check the prediction - // first check if the target or fallthrough address matches what was predicted. - assign TargetWrongE = IEUAdrE != PCD; - assign FallThroughWrongE = PCLinkE != PCD; - // If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected - // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi - // it must have selected the fall through. - assign PredictionPCWrongE = PCCorrectE != PCD; - -// assign PredictionPCWrongE = (PCSrcE & (|InstrClassE) ? TargetWrongE : FallThroughWrongE); - - // The branch direction also need to checked. - // However if the direction is wrong then the pc will be wrong. This is only relavent to checking the - // accuracy of the direciton prediction. - //assign DirPredictionWrongE = (BPPredE[1] ^ PCSrcE) & InstrClassE[0]; - - // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. - // Also we want to track this in a performance counter. - assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE; - - // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about - // the direction or class, but correct about the target we don't have the flush the pipeline. However we still - // need this information to verify the accuracy of the predictors. - assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; -// assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; - - // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. - //assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PredictionPCWrongE; - //assign BTBPredPCWrongE = TargetWrongE & (InstrClassE[3] | InstrClassE[1] | InstrClassE[0]) & PCSrcE; - assign BTBPredPCWrongE = BTBTargetWrongE; - - // similar with RAS. Over counts ras if the class prediction was wrong. - //assign RASPredPCWrongE = TargetWrongE & InstrClassE[2] & PCSrcE; - assign RASPredPCWrongE = RASTargetWrongE; - // Finally if the real instruction class is non CFI but the predictor said it was we need to count. - assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; - - // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD; - - // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); - // If the prediction is wrong select the correct address. - mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); - // Correct branch/jump target. - mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); - - // If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE. - // Effectively this is PCM+4 or the non-existant PCLinkM - // if(`BPCLASS) begin - mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); - // end else begin - // assign NextValidPCE = PCE; - // end - - // performance counters - // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now - // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) - // 3. target ras (ras target wrong / class[2]) - // 4. direction (br dir wrong / class[0]) - - assign BTBTargetWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE; - assign RASTargetWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; - - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3]; - - flopenrc #(`XLEN) BTBTargetDReg(clk, reset, FlushD, ~StallD, PredPCF, PredPCD); - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, PredPCD, PredPCE); - - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); - flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); - -endmodule - -----/\----- EXCLUDED -----/\----- */ diff --git a/src/ifu/bpred/gshare_copy.sv b/src/ifu/bpred/gshare_copy.sv new file mode 100644 index 00000000..119056c8 --- /dev/null +++ b/src/ifu/bpred/gshare_copy.sv @@ -0,0 +1,80 @@ +/////////////////////////////////////////// +// globalHistoryPredictor.sv +// +// Written: Shreya Sanghai +// Email: ssanghai@hmc.edu +// Created: March 16, 2021 +// Modified: +// +// Purpose: Global History Branch predictor with parameterized global history register +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module gshare_copy #(parameter k = 10) ( + input logic clk, + input logic reset, + input logic StallF, StallD, StallE, StallM, + input logic FlushD, FlushE, FlushM, + output logic [1:0] DirPredictionF, + output logic DirPredictionWrongE, + // update + input logic [`XLEN-1:0] PCNextF, PCE, + input logic BranchInstrE, BranchInstrM, PCSrcE +); + + logic [k-1:0] IndexNextF, IndexE; + logic [1:0] DirPredictionD, DirPredictionE; + logic [1:0] NewDirPredictionE, NewDirPredictionM; + + logic [k-1:0] GHRF, GHRD, GHRE, GHR; + logic [k-1:0] GHRNext; + logic PCSrcM; + + assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + + ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), + .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ra1(IndexNextF), + .rd1(DirPredictionF), + .wa2(IndexE), + .wd2(NewDirPredictionE), + .we2(BranchInstrE & ~StallM & ~FlushM), + .bwe2(1'b1)); + + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + + assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; + flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); + flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); + + flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); + flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); + flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); + + +endmodule diff --git a/testbench/tests.vh b/testbench/tests.vh index 1e1065cf..ec6f04f4 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -50,9 +50,9 @@ string tvpaths[] = '{ string embench[] = '{ `EMBENCH, + "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", - "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/matmult-int/matmult-int", From 9f25b53b362fa8870633a503e09383f2e0abf3da Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 17:57:05 -0600 Subject: [PATCH 10/21] Fixed bug in basic gshare implementation. Should be a better comparison to the speculative versions now. --- src/ifu/bpred/gshare.sv | 4 ++-- src/ifu/bpred/gshare_copy.sv | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index fde1a082..4cffbf93 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -48,8 +48,8 @@ module gshare #(parameter k = 10) ( logic [k-1:0] GHRNext; logic PCSrcM; - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexE = GHRE ^ {PCE[k+1] ^ PCE[1], PCE[k:2]}; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), diff --git a/src/ifu/bpred/gshare_copy.sv b/src/ifu/bpred/gshare_copy.sv index 119056c8..d73bce20 100644 --- a/src/ifu/bpred/gshare_copy.sv +++ b/src/ifu/bpred/gshare_copy.sv @@ -31,16 +31,16 @@ module gshare_copy #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCE, + input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchInstrE, BranchInstrM, PCSrcE ); - logic [k-1:0] IndexNextF, IndexE; + logic [k-1:0] IndexNextF, IndexM; logic [1:0] DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE, NewDirPredictionM; @@ -48,16 +48,16 @@ module gshare_copy #(parameter k = 10) ( logic [k-1:0] GHRNext; logic PCSrcM; - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexNextF = GHRNext ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexM = GHR ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), .ra1(IndexNextF), .rd1(DirPredictionF), - .wa2(IndexE), - .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), + .wa2(IndexM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -72,9 +72,11 @@ module gshare_copy #(parameter k = 10) ( flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); +/* -----\/----- EXCLUDED -----\/----- flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); + -----/\----- EXCLUDED -----/\----- */ endmodule From bbc60952608ea690b8f8636d4c6f898ff4840ed1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 18:07:32 -0600 Subject: [PATCH 11/21] Updated global history predictor. --- src/ifu/bpred/speculativeglobalhistory.sv | 25 ++++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 645ac99e..1ddd3dfe 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// speculativeglobalhistory.sv +// gsharePredictor.sv // // Written: Shreya Sanghai // Email: ssanghai@hmc.edu @@ -36,8 +36,10 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [3:0] InstrClassD, InstrClassE, - input logic [2:0] PredInstrClassF, WrongPredInstrClassD, + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, + input logic [3:0] PredInstrClassF, + input logic [3:0] InstrClassD, InstrClassE, InstrClassM, + input logic [3:0] WrongPredInstrClassD, input logic PCSrcE ); @@ -47,9 +49,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE; - logic [k-1:0] GHRF, GHRD, GHRE; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic GHRLastF; - logic [k-1:0] GHRNextF, GHRNextD, GHRNextE; + logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; @@ -57,8 +59,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( assign IndexNextF = GHRNextF; assign IndexF = GHRF; - assign IndexD = GHRD[k-1:0]; - assign IndexE = GHRE[k-1:0]; + assign IndexD = GHRD; + assign IndexE = GHRE; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF | reset), .ce2(~StallM & ~FlushM), @@ -111,15 +113,18 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( // If it is wrong and branch does exist then shift right and insert the prediction. // If the branch does not exist then shift left and use GHRLastF to restore the LSB. logic [k-1:0] GHRClassWrong; - mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong); + mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong); // As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE. mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD); flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD); - mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); + mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE); - flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE); + flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE); + + assign GHRNextM = FlushM ? GHRM : GHRE; + flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0]; From a579bbcdd184534d4be47ec7a5dace348a019112 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 18:08:13 -0600 Subject: [PATCH 12/21] Fixed global history predictor. --- src/ifu/bpred/speculativeglobalhistory.sv | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ifu/bpred/speculativeglobalhistory.sv b/src/ifu/bpred/speculativeglobalhistory.sv index 1ddd3dfe..8ebda61c 100644 --- a/src/ifu/bpred/speculativeglobalhistory.sv +++ b/src/ifu/bpred/speculativeglobalhistory.sv @@ -36,7 +36,6 @@ module speculativeglobalhistory #(parameter int k = 10 ) ( output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, input logic [3:0] PredInstrClassF, input logic [3:0] InstrClassD, InstrClassE, InstrClassM, input logic [3:0] WrongPredInstrClassD, From 33d2bf84f8f2376cfb9b7a46e98d8fd49c7240ef Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 18:08:51 -0600 Subject: [PATCH 13/21] More fixeds to global history. --- src/ifu/bpred/bpred.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 69bec3e7..a88845ef 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -107,7 +107,7 @@ module bpred ( end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .DirPredictionF, .DirPredictionWrongE, - .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); + .PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, From 9c9acc0055c8256e2c0d1d23c70e5077341fd7b2 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 13 Feb 2023 18:52:52 -0600 Subject: [PATCH 14/21] Updated gshare (no speculation) to have better performance. --- src/ifu/bpred/bpred.sv | 4 +- src/ifu/bpred/gshare.sv | 17 ++++---- src/ifu/bpred/gshare_copy.sv | 80 ------------------------------------ 3 files changed, 11 insertions(+), 90 deletions(-) delete mode 100644 src/ifu/bpred/gshare_copy.sv diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 69bec3e7..6ed6c8c8 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -110,8 +110,8 @@ module bpred ( .PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE); end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor - gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE, + gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index fde1a082..35a73fb0 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -31,12 +31,12 @@ module gshare #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCE, + input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchInstrE, BranchInstrM, PCSrcE ); @@ -44,20 +44,20 @@ module gshare #(parameter k = 10) ( logic [1:0] DirPredictionD, DirPredictionE; logic [1:0] NewDirPredictionE, NewDirPredictionM; - logic [k-1:0] GHRF, GHRD, GHRE, GHR; + logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; logic PCSrcM; - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; + assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; + assign IndexE = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), .ra1(IndexNextF), .rd1(DirPredictionF), .wa2(IndexE), - .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), + .wd2(NewDirPredictionM), + .we2(BranchInstrM & ~StallW & ~FlushW), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -75,6 +75,7 @@ module gshare #(parameter k = 10) ( flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); + flopenrc #(k) GHRMReg(clk, reset, FlushM, ~StallM, GHRE, GHRM); endmodule diff --git a/src/ifu/bpred/gshare_copy.sv b/src/ifu/bpred/gshare_copy.sv deleted file mode 100644 index 119056c8..00000000 --- a/src/ifu/bpred/gshare_copy.sv +++ /dev/null @@ -1,80 +0,0 @@ -/////////////////////////////////////////// -// globalHistoryPredictor.sv -// -// Written: Shreya Sanghai -// Email: ssanghai@hmc.edu -// Created: March 16, 2021 -// Modified: -// -// Purpose: Global History Branch predictor with parameterized global history register -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module gshare_copy #(parameter k = 10) ( - input logic clk, - input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, - // update - input logic [`XLEN-1:0] PCNextF, PCE, - input logic BranchInstrE, BranchInstrM, PCSrcE -); - - logic [k-1:0] IndexNextF, IndexE; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; - - logic [k-1:0] GHRF, GHRD, GHRE, GHR; - logic [k-1:0] GHRNext; - logic PCSrcM; - - assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]}; - assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]}; - - ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), - .ra1(IndexNextF), - .rd1(DirPredictionF), - .wa2(IndexE), - .wd2(NewDirPredictionE), - .we2(BranchInstrE & ~StallM & ~FlushM), - .bwe2(1'b1)); - - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); - - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; - - assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; - flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); - flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); - - flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); - flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD); - flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE); - - -endmodule From 0ac99d22334a078b1199861810f38547eba0ddab Mon Sep 17 00:00:00 2001 From: eroom1966 Date: Wed, 15 Feb 2023 11:12:30 +0000 Subject: [PATCH 15/21] add files to support coverage --- sim/imperas.ic | 1 - sim/wally-imperas-cov.do | 68 ++++++++++++++++++++++++++++++++++ sim/wally-imperas.do | 1 + testbench/testbench_imperas.sv | 1 + 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 sim/wally-imperas-cov.do diff --git a/sim/imperas.ic b/sim/imperas.ic index 302773d0..d28234bb 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -1,6 +1,5 @@ --override cpu/show_c_prefix=T --override cpu/unaligned=F ---override cpu/mstatus_FS=1 --override cpu/ignore_non_leaf_DAU=1 # Enable the Imperas instruction coverage diff --git a/sim/wally-imperas-cov.do b/sim/wally-imperas-cov.do new file mode 100644 index 00000000..9ac7a39e --- /dev/null +++ b/sim/wally-imperas-cov.do @@ -0,0 +1,68 @@ +# wally.do +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals + # *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings. +vlog +incdir+../config/$1 \ + +incdir+../config/shared \ + +define+USE_IMPERAS_DV \ + +incdir+$env(IMPERAS_HOME)/ImpPublic/include/host \ + +incdir+$env(IMPERAS_HOME)/ImpProprietary/include/host \ + $env(IMPERAS_HOME)/ImpPublic/source/host/rvvi/rvvi-api-pkg.sv \ + $env(IMPERAS_HOME)/ImpPublic/source/host/rvvi/rvvi-trace.sv \ + $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/rvvi-pkg.sv \ + $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2api.sv \ + $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2log.sv \ + \ + +define+INCLUDE_TRACE2COV +define+COVER_BASE_RV64I +define+COVER_LEVEL_DV_PR_EXT \ + +define+COVER_RV64I \ + +define+COVER_RV64C \ + +define+COVER_RV64M \ + +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source \ + $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2cov.sv \ + \ + ../testbench/testbench_imperas.sv \ + ../testbench/common/*.sv \ + ../src/*/*.sv \ + ../src/*/*/*.sv \ + -suppress 2583 \ + -suppress 7063 \ + +acc +vopt +acc work.testbench -G DEBUG=1 -o workopt +vsim workopt +nowarn3829 -fatal 7 \ + -sv_lib $env(IMPERAS_HOME)/lib/Linux64/ImperasLib/imperas.com/verification/riscv/1.0/model \ + +testDir=$env(TESTDIR) $env(OTHERFLAGS) +TRACE2COV_ENABLE=1 \ + -do "coverage save -onexit ./riscv.ucdb" + +view wave +#-- display input and output signals as hexidecimal values +# add log -recursive /* +# do wave.do + +run -all + +noview ../testbench/testbench_imperas.sv +view wave + +quit -f diff --git a/sim/wally-imperas.do b/sim/wally-imperas.do index 4164b7bd..d10c57d5 100644 --- a/sim/wally-imperas.do +++ b/sim/wally-imperas.do @@ -34,6 +34,7 @@ vlog +incdir+../config/$1 \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/rvvi-pkg.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2api.sv \ $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2log.sv \ + $env(IMPERAS_HOME)/ImpProprietary/source/host/rvvi/trace2cov.sv \ ../testbench/testbench_imperas.sv \ ../testbench/common/*.sv \ ../src/*/*.sv \ diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index e52a79c2..c7d987b8 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -125,6 +125,7 @@ module testbench; `ifdef USE_IMPERAS_DV trace2log idv_trace2log(rvvi); + trace2cov idv_trace2cov(rvvi); // enabling of comparison types trace2api #(.CMP_PC (1), From 744991bd5a98b689300823fae3e0df3a8a23d092 Mon Sep 17 00:00:00 2001 From: James Stine Date: Wed, 15 Feb 2023 18:12:12 -0600 Subject: [PATCH 16/21] Update if-then-else for ram items --- src/generic/mem/ram1p1rwbe.sv | 14 +++++++------- src/generic/mem/ram2p1r1wbe.sv | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/generic/mem/ram1p1rwbe.sv b/src/generic/mem/ram1p1rwbe.sv index e33e708a..f1e784c1 100644 --- a/src/generic/mem/ram1p1rwbe.sv +++ b/src/generic/mem/ram1p1rwbe.sv @@ -55,10 +55,10 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = bwe[index/8]; - TS1N28HPCPSVTB64X128M4SW sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), + ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); - + end else if (`USE_SRAM == 1 && WIDTH == 44 && DEPTH == 64) begin genvar index; // 64 x 44-bit SRAM @@ -69,15 +69,15 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); - end if (`USE_SRAM == 1 && WIDTH == 128 && DEPTH == 32) begin + end else if (`USE_SRAM == 1 && WIDTH == 128 && DEPTH == 32) begin genvar index; // 64 x 128-bit SRAM logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = bwe[index/8]; - TS1N28HPCPSVTB64X128M4SW sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), - .A(addr), .D(din), - .BWEB(~BitWriteMask), .Q(dout)); + ram1p1rwbe_128x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB(~BitWriteMask), .Q(dout)); end else if (`USE_SRAM == 1 && WIDTH == 22 && DEPTH == 32) begin genvar index; @@ -85,7 +85,7 @@ module ram1p1rwbe #(parameter DEPTH=128, WIDTH=256) ( logic [WIDTH-1:0] BitWriteMask; for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = bwe[index/8]; - ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), .BWEB(~BitWriteMask), .Q(dout)); diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index 08c232bb..da099cdc 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -64,7 +64,7 @@ module ram2p1r1wbe #(parameter DEPTH=128, WIDTH=256) ( .QA(rd1), .QB()); - end if (`USE_SRAM == 1 && WIDTH == 36 && DEPTH == 1024) begin + end else if (`USE_SRAM == 1 && WIDTH == 36 && DEPTH == 1024) begin ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), From 64826a1ec94711372e00f5e64919a1fbfc13c23e Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 16 Feb 2023 08:07:17 -0600 Subject: [PATCH 17/21] Update topo psyn stuff --- synthDC/.synopsys_dc.setup | 20 ++++++++++++++++++++ synthDC/Makefile | 4 +++- synthDC/scripts/synth.tcl | 30 ++++++++++++++++++++++++++++++ synthDC/wallySynth.py | 4 ++-- 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index 2fdc11c2..1310c4dd 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -21,6 +21,21 @@ if {$tech == "sky130"} { set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a lappend search_path $s10lib } +elseif {$tech == "tsmc28psyn"} { + set TLU /home/jstine/TLU+ + set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a + lappend search_path $s10lib + set TLUPLUS true + set mw_logic1_net VDD + set mw_logic0_net VSS + set TLU_PDK /import/yukari1/jstine/TLU+/ + set CAPTABLE $TLU/1p8m/ + set MW_REFERENCE_LIBRARY /home/jstine/MW + set MW_TECH_FILE tcbn28hpcplusbwp7t30p140 + set MAX_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_typical.tluplus + set MIN_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_rcworst.tluplus + set PRS_MAP_FILE $MW_REFERENCE_LIBRARY/astro_layername.map +} # Synthetic libraries set synthetic_library [list dw_foundation.sldb] @@ -36,6 +51,11 @@ if {$tech == "sky130"} { } elseif {$tech == "tsmc28"} { lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db } +} elseif {$tech == "tsmc28psyn"} { + set mw_reference_library [list ] + lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db + lappend mw_reference_library $MW_REFERENCE_LIBRARY/tcbn28hpcplusbwp7t30p140 +} # Set Link Library set link_library "$target_library $synthetic_library" diff --git a/synthDC/Makefile b/synthDC/Makefile index 76531d95..d7fb2b91 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -122,6 +122,8 @@ synth: mkdirecs configs rundc clean rundc: dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out +rundctopo: + dc_shell-xg-t -64bit -topographical_mode -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out clean: rm -rf $(OUTPUTDIR)/hdl @@ -132,4 +134,4 @@ clean: rm -f filenames*.log rm -f power.saif rm -f Synopsys_stack_trace_*.txt - rm -f crte_*.txt \ No newline at end of file + rm -f crte_*.txt diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 1040f35c..83020352 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -49,6 +49,36 @@ set report_default_significant_digits 6 set verilogout_show_unconnected_pins "true" set vhdlout_show_unconnected_pins "true" +# Set up MW List +set MY_LIB_NAME $my_toplevel +# Create MW +if { [shell_is_in_topographical_mode] } { + echo "In Topographical Mode...processing\n" + if {[file isdirectory $MY_LIB_NAME]} { + echo "MW directory already here, deleting/readdding." + [exec rm -rf $my_toplevel] + create_mw_lib -technology $MW_REFERENCE_LIBRARY/$MW_TECH_FILE.tf \ + -mw_reference_library $mw_reference_library $MY_LIB_NAME + } else { + create_mw_lib -technology $MW_REFERENCE_LIBRARY/$MW_TECH_FILE.tf \ + -mw_reference_library $mw_reference_library $MY_LIB_NAME + } + + # Open MW + open_mw_lib $MY_LIB_NAME + + # TLU+ + set_tlu_plus_files -max_tluplus $MAX_TLU_FILE -min_tluplus $MIN_TLU_FILE \ + -tech2itf_map $PRS_MAP_FILE + +} else { + if {[file isdirectory $MY_LIB_NAME]} { + [exec rm -rf $my_toplevel] + echo "MW directory already here, deleting." + } + echo "In normal DC mode...processing\n" +} + # Due to parameterized Verilog must use analyze/elaborate and not # read_verilog/vhdl (change to pull in Verilog and/or VHDL) # diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 3253607d..cfcfde78 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -16,7 +16,7 @@ def mask(command): if __name__ == '__main__': - techs = ['sky90', 'tsmc28'] + techs = ['sky90', 'tsmc28', 'tsmc28psyn'] allConfigs = ['rv32gc', 'rv32imc', 'rv64gc', 'rv64imc', 'rv32e', 'rv32i', 'rv64i'] freqVaryPct = [-20, -12, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20] @@ -61,4 +61,4 @@ if __name__ == '__main__': defaultfreq = 500 if tech == 'sky90' else 1500 freq = args.targetfreq if args.targetfreq else defaultfreq config = args.version if args.version else 'rv64gc' - runSynth(config, mod, tech, freq, maxopt, usesram) \ No newline at end of file + runSynth(config, mod, tech, freq, maxopt, usesram) From 004f8a970e5791b4dba2628ec513f26abc965a69 Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 16 Feb 2023 13:38:26 -0600 Subject: [PATCH 18/21] Fix bugs in scripts for synthesis and tsmc28 psyn --- synthDC/.synopsys_dc.setup | 13 +++++++------ synthDC/wallySynth.py | 14 ++++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index 1310c4dd..6942e58e 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -18,20 +18,22 @@ if {$tech == "sky130"} { set s9lib $timing_lib/sky90/sky90_sc/V1.7.4/lib lappend search_path $s9lib } elseif {$tech == "tsmc28"} { - set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a + set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ + set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a lappend search_path $s10lib } elseif {$tech == "tsmc28psyn"} { set TLU /home/jstine/TLU+ - set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a + set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ + set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a lappend search_path $s10lib set TLUPLUS true set mw_logic1_net VDD set mw_logic0_net VSS - set TLU_PDK /import/yukari1/jstine/TLU+/ + set TLU_PDK /home/jstine/TLU+/ set CAPTABLE $TLU/1p8m/ set MW_REFERENCE_LIBRARY /home/jstine/MW - set MW_TECH_FILE tcbn28hpcplusbwp7t30p140 + set MW_TECH_FILE tcbn28hpcplusbwp30p140 set MAX_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_typical.tluplus set MIN_TLU_FILE $CAPTABLE/crn28hpc+_1p08m+ut-alrdl_5x1z1u_rcworst.tluplus set PRS_MAP_FILE $MW_REFERENCE_LIBRARY/astro_layername.map @@ -50,11 +52,10 @@ if {$tech == "sky130"} { lappend target_library $s9lib/scc9gena_tt_1.2v_25C.db } elseif {$tech == "tsmc28"} { lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db -} } elseif {$tech == "tsmc28psyn"} { set mw_reference_library [list ] lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db - lappend mw_reference_library $MW_REFERENCE_LIBRARY/tcbn28hpcplusbwp7t30p140 + lappend mw_reference_library $MW_REFERENCE_LIBRARY/tcbn28hpcplusbwp30p140 } # Set Link Library diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index cfcfde78..876a482d 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -5,9 +5,9 @@ import subprocess from multiprocessing import Pool import argparse -def runSynth(config, mod, tech, freq, maxopt, usesram): +def runSynth(config, mod, tech, freq, maxopt, usesram, usetopo): global pool - command = "make synth DESIGN=wallypipelinedcore CONFIG={} MOD={} TECH={} DRIVE=FLOP FREQ={} MAXOPT={} USESRAM={} MAXCORES=1".format(config, mod, tech, freq, maxopt, usesram) + command = "make synth DESIGN=wallypipelinedcore CONFIG={} MOD={} TECH={} DRIVE=FLOP FREQ={} MAXOPT={} USESRAM={} USETOPO={} MAXCORES=1".format(config, mod, tech, freq, maxopt, usesram, usetopo) pool.map(mask, [command]) def mask(command): @@ -33,32 +33,34 @@ if __name__ == '__main__': parser.add_argument("-e", "--tech", choices=techs, help = "Technology") parser.add_argument("-o", "--maxopt", action='store_true', help = "Turn on MAXOPT") parser.add_argument("-r", "--usesram", action='store_true', help = "Use SRAM modules") + parser.add_argument("-topo", "--usetopo", action='store_true', help = "Run physical synthesis") args = parser.parse_args() tech = args.tech if args.tech else 'sky90' maxopt = int(args.maxopt) usesram = int(args.usesram) + usetopo = int(args.usetopop) mod = 'orig' if args.freqsweep: sc = args.freqsweep config = args.version if args.version else 'rv32e' for freq in [round(sc+sc*x/100) for x in freqVaryPct]: # rv32e freq sweep - runSynth(config, mod, tech, freq, maxopt, usesram) + runSynth(config, mod, tech, freq, maxopt, usesram, usetopo) if args.configsweep: defaultfreq = 1500 if tech == 'sky90' else 5000 freq = args.targetfreq if args.targetfreq else defaultfreq for config in ['rv32i', 'rv64gc', 'rv64i', 'rv32gc', 'rv32imc', 'rv32e']: #configs - runSynth(config, mod, tech, freq, maxopt, usesram) + runSynth(config, mod, tech, freq, maxopt, usesram, usetopo) if args.featuresweep: defaultfreq = 500 if tech == 'sky90' else 1500 freq = args.targetfreq if args.targetfreq else defaultfreq config = args.version if args.version else 'rv64gc' for mod in ['noFPU', 'noMulDiv', 'noPriv', 'PMP0', 'orig']: - runSynth(config, mod, tech, freq, maxopt, usesram) + runSynth(config, mod, tech, freq, maxopt, usesram, usetopo) else: defaultfreq = 500 if tech == 'sky90' else 1500 freq = args.targetfreq if args.targetfreq else defaultfreq config = args.version if args.version else 'rv64gc' - runSynth(config, mod, tech, freq, maxopt, usesram) + runSynth(config, mod, tech, freq, maxopt, usesram, usetopo) From fedbc1a43b3faf2398da9f7d6063c713d09e6db5 Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 16 Feb 2023 15:01:52 -0600 Subject: [PATCH 19/21] Get rid of extra CR/LF in .synopsys_dc.setup file --- synthDC/.synopsys_dc.setup | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index 6942e58e..ac93f659 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -21,8 +21,7 @@ if {$tech == "sky130"} { set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a lappend search_path $s10lib -} -elseif {$tech == "tsmc28psyn"} { +} elseif {$tech == "tsmc28psyn"} { set TLU /home/jstine/TLU+ set pdk /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/ set s10lib $pdk/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a From c8307dffc19900f82debaaaf8677d0677871eb5a Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 16 Feb 2023 15:16:32 -0600 Subject: [PATCH 20/21] Update bug in Makefile --- synthDC/Makefile | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index d7fb2b91..9dd2b0ab 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -21,6 +21,7 @@ export MAXCORES ?= 1 export MAXOPT ?= 0 export DRIVE ?= FLOP export USESRAM ?= 1 +export USETOPO ?= 0 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) @@ -30,8 +31,6 @@ export SAIFPOWER ?= 0 OLDCONFIGDIR ?= ${WALLY}/config export CONFIGDIR ?= $(OUTPUTDIR)/config - - default: @echo " Basic synthesis procedure for Wally:" @echo " Invoke with make synth" @@ -51,6 +50,18 @@ DIRS = $(DIRS32) $(DIRS64) configs: $(CONFIG) $(CONFIG): +ifeq ($(USETOPO), 0)) +ifeq ($(TECH), tsmc28psyn)) + @echo "Bad configuration: you cannot use physical synthesis without tsmc28psyn and usetopo (i.e., -topo)" + exit 1 +endif +endif +ifeq ($(USETOPO), 1)) +ifneq ($(TECH), tsmc28psyn)) + @echo "Bad configuration: you cannot use usetopo (i.e., -topo) without tsmc28psyn" + exit 1 +endif +endif @echo $(CONFIG) cp -r $(OLDCONFIGDIR)/shared/*.vh $(CONFIGDIR) cp -r $(OLDCONFIGDIR)/$(CONFIG)/* $(CONFIGDIR) @@ -116,15 +127,17 @@ mkdirecs: @mkdir -p $(OUTPUTDIR)/mapped @mkdir -p $(OUTPUTDIR)/unmapped - synth: mkdirecs configs rundc clean rundc: +ifeq ($(USETOPO), 0) dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out +else -rundctopo: dc_shell-xg-t -64bit -topographical_mode -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out - +endif + + clean: rm -rf $(OUTPUTDIR)/hdl rm -rf $(OUTPUTDIR)/WORK From 8d94273a7ad65cb9acbe290d01237eaea17d2e9f Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 16 Feb 2023 15:50:31 -0600 Subject: [PATCH 21/21] fix typo - remove extra p at end of script --- synthDC/.synopsys_dc.setup | 1 - synthDC/wallySynth.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/synthDC/.synopsys_dc.setup b/synthDC/.synopsys_dc.setup index ac93f659..ccc0c67c 100755 --- a/synthDC/.synopsys_dc.setup +++ b/synthDC/.synopsys_dc.setup @@ -29,7 +29,6 @@ if {$tech == "sky130"} { set TLUPLUS true set mw_logic1_net VDD set mw_logic0_net VSS - set TLU_PDK /home/jstine/TLU+/ set CAPTABLE $TLU/1p8m/ set MW_REFERENCE_LIBRARY /home/jstine/MW set MW_TECH_FILE tcbn28hpcplusbwp30p140 diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 876a482d..d89964c1 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -40,7 +40,7 @@ if __name__ == '__main__': tech = args.tech if args.tech else 'sky90' maxopt = int(args.maxopt) usesram = int(args.usesram) - usetopo = int(args.usetopop) + usetopo = int(args.usetopo) mod = 'orig' if args.freqsweep: