From d880720b7eeff213747b311f9d6c6386ece046dc Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 23 Feb 2023 15:55:34 -0600 Subject: [PATCH 01/55] Partial replacement of InstrClassX with {JalX, RetX, JumpX, and BranchX}. --- src/ifu/bpred/RASPredictor.sv | 4 +- src/ifu/bpred/bpred.sv | 92 +++++++++++++++++--------------- src/ifu/bpred/twoBitPredictor.sv | 8 +-- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 330607af4..3f72c4fab 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,7 +33,7 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong + input logic WrongBPRetD, // Prediction class is wrong input logic [3:0] InstrClassD, input logic [3:0] InstrClassE, // Instr class input logic [3:0] PredInstrClassF, @@ -61,7 +61,7 @@ module RASPredictor #(parameter int StackSize = 16 )( assign PopF = PredInstrClassF[2] & ~StallD & ~FlushD; assign PushE = InstrClassE[3] & ~StallM & ~FlushM; - assign WrongPredRetD = (WrongPredInstrClassD[2]) & ~StallE & ~FlushE; + assign WrongPredRetD = (WrongBPRetD) & ~StallE & ~FlushE; assign FlushedRetDE = (~StallE & FlushE & InstrClassD[2]) | (~StallM & FlushM & InstrClassE[2]); // flushed ret assign RepairD = WrongPredRetD | FlushedRetDE ; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index ec974d14c..27ee6bc10 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -72,12 +72,9 @@ module bpred ( logic [1:0] DirPredictionF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; logic [`XLEN-1:0] BTAF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassD; - logic [3:0] InstrClassE; logic DirPredictionWrongE; logic SelBPPredF; @@ -91,34 +88,44 @@ module bpred ( logic [`XLEN-1:0] BTAD; + logic BTBJalF, BTBRetF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPRetF, BPJalF; + logic BPBranchD, BPJumpD, BPRetD, BPJalD; + logic RetD, JalD; + logic RetE, JalE; + logic BranchM, JumpM, RetM, JalM; + logic WrongBPRetD; + + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor - twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, + .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .BranchInstrF(BPBranchF), .BranchInstrD(BranchD), .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .BranchInstrF(BPBranchF), .BranchInstrD(BranchD), .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -142,16 +149,16 @@ module bpred ( TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .BTAF, .BTAD, - .BTBPredInstrClassF, + .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, .IEUAdrE, .IEUAdrM, - .InstrClassD, .InstrClassE, .InstrClassM); + .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM})); // the branch predictor needs a compact decoding of the instruction class. if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode logic [3:0] InstrClassF; logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; - logic JumpF, BranchF; + logic NCJumpF, NCBranchF; if(`C_SUPPORTED) begin logic [4:0] CompressedOpcF; @@ -166,48 +173,46 @@ module bpred ( assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; end - assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; - assign BranchF = PostSpillInstrRawF[6:0] == 7'h63; + assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; - assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF); - assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (CJumpF)); - assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); + assign BPRetF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 + (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 + assign BPJalF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - assign PredInstrClassF = InstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; end else begin - assign PredInstrClassF = BTBPredInstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; + assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; end + assign SelBPPredF = (BPBranchF & DirPredictionF[1]) | BPJumpF; // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PredInstrClassF, .InstrClassD, .InstrClassE, - .WrongPredInstrClassD, .RASPCF, .PCLinkE); + .PredInstrClassF({BPJalF, BPRetF, BPJumpF, BPBranchF}), .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), + .WrongBPRetD, .RASPCF, .PCLinkE); - assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTAF; + assign BPPredPCF = BPRetF ? RASPCF : BTAF; - assign InstrClassD[0] = BranchD; - assign InstrClassD[1] = JumpD ; - assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + //assign InstrClassD[0] = BranchD; + //assign InstrClassD[1] = JumpD ; + //assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 + assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 + //assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 + assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); + flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {JalE, RetE, JumpE, BranchE}, {JalM, RetM, JumpM, BranchM}); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - - // pipeline the class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + + // pipeline the predicted class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPJalF, BPRetF, BPJumpF, BPBranchF}, {BPJalD, BPRetD, BPJumpD, BPBranchD}); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. @@ -218,8 +223,8 @@ module bpred ( assign PredictionPCWrongE = PCCorrectE != PCD; // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; - assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; + assign AnyWrongPredInstrClassD = |({BPJalD, BPRetD, BPJumpD, BPBranchD} ^ {JalD, RetD, JumpD, BranchD}); + assign WrongBPRetD = BPRetD ^ RetD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. //assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); @@ -257,10 +262,10 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~RetE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & RetE & PCSrcE; - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1]; + assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); @@ -275,5 +280,8 @@ module bpred ( end else begin assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; end + + // **** Fix me + assign InstrClassM = {JalM, RetM, JumpM, BranchM}; endmodule diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 4a7be674d..37405ab42 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -31,8 +31,8 @@ module twoBitPredictor #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCM, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, @@ -55,12 +55,12 @@ module twoBitPredictor #(parameter k = 10) ( ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), .rd1(DirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM & ~StallM & ~FlushM), + .we2(BranchInstrM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); From 6e8791a0a543072921d6dee3973bbc1d99d5ee66 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 23 Feb 2023 16:19:03 -0600 Subject: [PATCH 02/55] Major cleanup of bp. --- src/ifu/bpred/RASPredictor.sv | 16 ++++++++-------- src/ifu/bpred/bpred.sv | 12 ++++++------ src/ifu/bpred/gshare.sv | 24 ++++++++++++------------ src/ifu/bpred/gsharebasic.sv | 10 +++++----- src/ifu/bpred/twoBitPredictor.sv | 6 +++--- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 3f72c4fab..5f14a028e 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -34,9 +34,9 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, input logic WrongBPRetD, // Prediction class is wrong - input logic [3:0] InstrClassD, - input logic [3:0] InstrClassE, // Instr class - input logic [3:0] PredInstrClassF, + input logic RetD, + input logic RetE, JalE, // Instr class + input logic BPRetF, input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -58,17 +58,17 @@ module RASPredictor #(parameter int StackSize = 16 )( logic WrongPredRetD; - assign PopF = PredInstrClassF[2] & ~StallD & ~FlushD; - assign PushE = InstrClassE[3] & ~StallM & ~FlushM; + assign PopF = BPRetF & ~StallD & ~FlushD; + assign PushE = JalE & ~StallM & ~FlushM; assign WrongPredRetD = (WrongBPRetD) & ~StallE & ~FlushE; - assign FlushedRetDE = (~StallE & FlushE & InstrClassD[2]) | (~StallM & FlushM & InstrClassE[2]); // flushed ret + assign FlushedRetDE = (~StallE & FlushE & RetD) | (~StallM & FlushM & RetE); // flushed ret assign RepairD = WrongPredRetD | FlushedRetDE ; - assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~InstrClassD[2]); // Guessed it was a ret, but its not + assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~RetD); // Guessed it was a ret, but its not - assign DecRepairD = WrongPredRetD & InstrClassD[2]; // Guessed non ret but is a ret. + assign DecRepairD = WrongPredRetD & RetD; // Guessed non ret but is a ret. assign CounterEn = PopF | PushE | RepairD; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 27ee6bc10..fc6721ce5 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -103,29 +103,29 @@ module bpred ( twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(BPBranchF), .BranchInstrD(BranchD), .BranchInstrE(BranchE), .BranchInstrM(BranchM), + .BPBranchF, .BranchD, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(BPBranchF), .BranchInstrD(BranchD), .BranchInstrE(BranchE), .BranchInstrM(BranchM), + .BPBranchF, .BranchD, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(BranchE), .BranchInstrM(BranchM), .PCSrcE); + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -191,7 +191,7 @@ module bpred ( // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PredInstrClassF({BPJalF, BPRetF, BPJumpF, BPBranchF}), .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), + .BPRetF, .RetD, .RetE, .JalE, .WrongBPRetD, .RASPCF, .PCLinkE); assign BPPredPCF = BPRetF ? RASPCF : BTAF; diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 5332ce5cd..2523693cf 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -39,7 +39,7 @@ module gshare #(parameter k = 10, output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, - input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, PCSrcE + input logic BPBranchF, BranchD, BranchE, BranchM, PCSrcE ); logic MatchF, MatchD, MatchE, MatchM; @@ -68,10 +68,10 @@ module gshare #(parameter k = 10, assign IndexM = GHRM; end - assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); - assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); - assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); - assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); + assign MatchF = BPBranchF & ~FlushD & (IndexNextF == IndexF); + assign MatchD = BranchD & ~FlushE & (IndexNextF == IndexD); + assign MatchE = BranchE & ~FlushM & (IndexNextF == IndexE); + assign MatchM = BranchM & ~FlushW & (IndexNextF == IndexM); assign MatchNextX = MatchF | MatchD | MatchE | MatchM; flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); @@ -91,7 +91,7 @@ module gshare #(parameter k = 10, .rd1(TableDirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -100,16 +100,16 @@ module gshare #(parameter k = 10, satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; - assign GHRNextF = BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; - assign GHRF = BranchInstrD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; - assign GHRD = BranchInstrE ? {PCSrcE, GHRE[k-1:1]} : GHRE; - assign GHRE = BranchInstrM ? {PCSrcM, GHRM[k-1:1]} : GHRM; + assign GHRNextF = BPBranchF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; + assign GHRD = BranchE ? {PCSrcE, GHRE[k-1:1]} : GHRE; + assign GHRE = BranchM ? {PCSrcM, GHRM[k-1:1]} : GHRM; assign GHRNextM = {PCSrcM, GHRM[k-1:1]}; - flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchInstrM, GHRNextM, GHRM); + flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchM, GHRNextM, GHRM); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); endmodule diff --git a/src/ifu/bpred/gsharebasic.sv b/src/ifu/bpred/gsharebasic.sv index cb0bbe9eb..e793e7ac6 100644 --- a/src/ifu/bpred/gsharebasic.sv +++ b/src/ifu/bpred/gsharebasic.sv @@ -39,7 +39,7 @@ module gsharebasic #(parameter k = 10, output logic DirPredictionWrongE, // update input logic [`XLEN-1:0] PCNextF, PCM, - input logic BranchInstrE, BranchInstrM, PCSrcE + input logic BranchE, BranchM, PCSrcE ); logic [k-1:0] IndexNextF, IndexM; @@ -64,7 +64,7 @@ module gsharebasic #(parameter k = 10, .rd1(DirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); @@ -73,10 +73,10 @@ module gsharebasic #(parameter k = 10, satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; - assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; - flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); + assign GHRNext = BranchM ? {PCSrcM, GHR[k-1:1]} : GHR; + flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchM, GHRNext, GHR); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 37405ab42..58bf1c6bd 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -36,7 +36,7 @@ module twoBitPredictor #(parameter k = 10) ( input logic [`XLEN-1:0] PCNextF, PCM, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, - input logic BranchInstrE, BranchInstrM, + input logic BranchE, BranchM, input logic PCSrcE ); @@ -60,13 +60,13 @@ module twoBitPredictor #(parameter k = 10) ( .rd1(DirPredictionF), .wa2(IndexM), .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .we2(BranchM), .bwe2(1'b1)); flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); From 1bed1c1869aa996ef169265895b4d7b267516e60 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Feb 2023 14:19:10 -0800 Subject: [PATCH 03/55] Switched to sky90 for default synthesis --- synthDC/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 136e610d2..d29c11332 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -12,7 +12,7 @@ export MOD ?= orig # title to add a note in the synth's directory name TITLE = # tsmc28, sky130, and sky90 presently supported -export TECH ?= tsmc28 +export TECH ?= sky90 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel export MAXCORES ?= 1 From 5243d1e1d466afd732be8cd509633c0b7c4e75fd Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 23 Feb 2023 20:44:47 -0600 Subject: [PATCH 04/55] Improved branch predictor graph generation. --- bin/parseHPMC.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 0156dc9fd..7b695d34d 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -247,13 +247,25 @@ if(sys.argv[1] == '-b'): currPercent.append(percent) dct[PredType] = (currSize, currPercent) print(dct) + fig, axes = plt.subplots() + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue'} for cat in dct: (x, y) = dct[cat] - plt.scatter(x, y, label='k') - plt.plot(x, y) - plt.ylabel('Prediction Accuracy') - plt.xlabel('Size (b or k)') - plt.legend(loc='upper left') + x=[int(2**int(v)/4) for v in x] + print(x, y) + axes.plot(x,y, color=colors[cat]) + axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat]) + #plt.scatter(x, y, label=cat) + #plt.plot(x, y) + #axes.set_xticks([4, 6, 8, 10, 12, 14]) + axes.legend(loc='upper left') + axes.set_xscale("log") + axes.set_ylabel('Prediction Accuracy') + axes.set_xlabel('Size (bytes)') + axes.set_xticks([16, 64, 256, 1024, 4096, 16384]) + axes.set_xticklabels([16, 64, 256, 1024, 4096, 16384]) + axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) plt.show() From 60752fe51c1dd1460977834477c73a4735ccca57 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 24 Feb 2023 07:52:46 -0800 Subject: [PATCH 05/55] Fixed special cases of address decoder and documented better --- src/mmu/adrdecs.sv | 4 ++-- src/mmu/pmachecker.sv | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 679bd185d..b1832d8c9 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -38,8 +38,8 @@ module adrdecs ( localparam logic [3:0] SUPPORTED_SIZE = (`LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec dtimdec(PhysicalAddress, `DTIM_BASE, `DTIM_RANGE, `DTIM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec iromdec(PhysicalAddress, `IROM_BASE, `IROM_RANGE, `IROM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec dtimdec(PhysicalAddress, `DTIM_BASE, `DTIM_RANGE, `DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec iromdec(PhysicalAddress, `IROM_BASE, `IROM_RANGE, `IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[9]); adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[8]); adrdec bootromdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec uncoreramdec(PhysicalAddress, `UNCORE_RAM_BASE, `UNCORE_RAM_RANGE, `UNCORE_RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[6]); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index e46cfe8b9..909cc564d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -58,8 +58,12 @@ module pmachecker ( // Only non-core RAM/ROM memory regions are cacheable assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; - assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; - assign AtomicAllowed = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; + // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly + // I/O is nonidempotent. + assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; + // Atomic operations are only allowed on RAM + assign AtomicAllowed = SelRegions[10] | SelRegions[8] | SelRegions[6]; + // Check if tightly integrated memories are selected assign SelTIM = SelRegions[10] | SelRegions[9]; // Detect access faults From 4ffaa75c2a1ba3055bd153f57d246294bfaa2e07 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 17:47:43 -0600 Subject: [PATCH 06/55] Modified btb forwarding logic to reduce critical path. --- src/ifu/bpred/bpred.sv | 24 ++++++++++++------------ src/ifu/bpred/btb.sv | 32 ++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index fc6721ce5..aee8c4912 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -77,7 +77,7 @@ module bpred ( logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic DirPredictionWrongE; - logic SelBPPredF; + logic BPPCSrcF; logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCCorrectE; @@ -96,6 +96,7 @@ module bpred ( logic BranchM, JumpM, RetM, JalM; logic WrongBPRetD; + logic [`XLEN-1:0] PCW; // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. @@ -147,16 +148,18 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BTAF, .BTAD, .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, .IEUAdrE, .IEUAdrM, .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM})); - // the branch predictor needs a compact decoding of the instruction class. - if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode - logic [3:0] InstrClassF; + if (!`INSTR_CLASS_PRED) begin : DirectClassDecode + // This section is mainly for testing, verification, and PPA comparison. + // An alternative to using the BTB to store the instruction class is to partially decode + // the instructions in the Fetch stage into, Jal, Ret, Jump, and Branch instructions. + // This logic is not described in the text book as of 23 February 2023. logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; logic NCJumpF, NCBranchF; @@ -185,9 +188,10 @@ module bpred ( (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); end else begin + // This section connects the BTB's instruction class prediction. assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; end - assign SelBPPredF = (BPBranchF & DirPredictionF[1]) | BPJumpF; + assign BPPCSrcF = (BPBranchF & DirPredictionF[1]) | BPJumpF; // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, @@ -196,11 +200,7 @@ module bpred ( assign BPPredPCF = BPRetF ? RASPCF : BTAF; - //assign InstrClassD[0] = BranchD; - //assign InstrClassD[1] = JumpD ; - //assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - //assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); @@ -227,7 +227,6 @@ module bpred ( assign WrongBPRetD = BPRetD ^ RetD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - //assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; logic BPPredWrongEAlt; @@ -237,7 +236,7 @@ module bpred ( // Output the predicted PC or corrected PC on miss-predict. // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F); // If the prediction is wrong select the correct address. mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); // Correct branch/jump target. @@ -283,5 +282,6 @@ module bpred ( // **** Fix me assign InstrClassM = {JalM, RetM, JumpM, BranchM}; + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 2bb006714..888287bcf 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) ( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAD, output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class @@ -47,12 +47,14 @@ module btb #(parameter Depth = 10 ) ( input logic [3:0] InstrClassM // Instruction class to insert into btb ); - logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex; + logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; logic [`XLEN-1:0] ResetPC; - logic MatchF, MatchD, MatchE, MatchM, MatchNextX, MatchXF; + logic MatchF, MatchD, MatchE, MatchM, MatchW, MatchNextX, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic UpdateEn; + logic [3:0] InstrClassW; + logic [`XLEN-1:0] IEUAdrW; // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. @@ -62,6 +64,7 @@ module btb #(parameter Depth = 10 ) ( assign PCDIndex = {PCD[Depth+1] ^ PCD[1], PCD[Depth:2]}; assign PCEIndex = {PCE[Depth+1] ^ PCE[1], PCE[Depth:2]}; assign PCMIndex = {PCM[Depth+1] ^ PCM[1], PCM[Depth:2]}; + assign PCWIndex = {PCW[Depth+1] ^ PCW[1], PCW[Depth:2]}; // must output a valid PC and valid bit during reset. Because only PCF, not PCNextF is reset, PCNextF is invalid // during reset. The BTB must produce a non X PC1NextF to allow the simulation to run. @@ -71,21 +74,24 @@ module btb #(parameter Depth = 10 ) ( assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; assign MatchF = PCNextFIndex == PCFIndex; - assign MatchD = PCNextFIndex == PCDIndex; - assign MatchE = PCNextFIndex == PCEIndex; - assign MatchM = PCNextFIndex == PCMIndex; - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; + assign MatchD = PCFIndex == PCDIndex; + assign MatchE = PCFIndex == PCEIndex; + assign MatchM = PCFIndex == PCMIndex; + assign MatchW = PCFIndex == PCWIndex; + assign MatchX = MatchD | MatchE | MatchM | MatchW; - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); +// flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); - assign ForwardBTBPrediction = MatchF ? {BTBPredInstrClassF, BTAF} : - MatchD ? {InstrClassD, BTAD} : + assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : MatchE ? {InstrClassE, IEUAdrE} : - {InstrClassM, IEUAdrM} ; + MatchM ? {InstrClassM, IEUAdrM} : + {InstrClassW, IEUAdrW} ; +/* -----\/----- EXCLUDED -----\/----- flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); + -----/\----- EXCLUDED -----/\----- */ - assign {BTBPredInstrClassF, BTAF} = MatchXF ? ForwardBTBPredictionF : {TableBTBPredictionF}; + assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; @@ -97,4 +103,6 @@ module btb #(parameter Depth = 10 ) ( flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + flopenrc #(`XLEN+4) IEUAdrWReg(clk, reset, FlushW, ~StallW, {InstrClassM, IEUAdrM}, {InstrClassW, IEUAdrW}); + endmodule From c2021927cebd38fc0509251eeb78d3ab65a4757b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 17:54:48 -0600 Subject: [PATCH 07/55] Prep to fix gshare critical path. --- src/ifu/bpred/bpred.sv | 12 ++++++++---- src/ifu/bpred/btb.sv | 8 +++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index aee8c4912..39ced3d92 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -94,9 +94,9 @@ module bpred ( logic RetD, JalD; logic RetE, JalE; logic BranchM, JumpM, RetM, JalM; + logic BranchW, JumpW, RetW, JalW; logic WrongBPRetD; - - logic [`XLEN-1:0] PCW; + logic [`XLEN-1:0] PCW, IEUAdrW; // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. @@ -152,8 +152,9 @@ module bpred ( .BTAF, .BTAD, .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, - .IEUAdrE, .IEUAdrM, - .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM})); + .IEUAdrE, .IEUAdrM, .IEUAdrW, + .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}), + .InstrClassW({JalW, RetW, JumpW, BranchW})); if (!`INSTR_CLASS_PRED) begin : DirectClassDecode // This section is mainly for testing, verification, and PPA comparison. @@ -205,6 +206,7 @@ module bpred ( flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {JalE, RetE, JumpE, BranchE}, {JalM, RetM, JumpM, BranchM}); + flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {JalM, RetM, JumpM, BranchM}, {JalW, RetW, JumpW, BranchW}); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); // branch predictor @@ -283,5 +285,7 @@ module bpred ( // **** Fix me assign InstrClassM = {JalM, RetM, JumpM, BranchM}; flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + flopenrc #(`XLEN) IEUAdrWReg(clk, reset, FlushW, ~StallW, IEUAdrM, IEUAdrW); + endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 888287bcf..362e416b4 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -42,9 +42,11 @@ module btb #(parameter Depth = 10 ) ( input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb + input logic [`XLEN-1:0] IEUAdrW, input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb - input logic [3:0] InstrClassM // Instruction class to insert into btb + input logic [3:0] InstrClassM, // Instruction class to insert into btb + input logic [3:0] InstrClassW ); logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; @@ -53,8 +55,6 @@ module btb #(parameter Depth = 10 ) ( logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic UpdateEn; - logic [3:0] InstrClassW; - logic [`XLEN-1:0] IEUAdrW; // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. @@ -103,6 +103,4 @@ module btb #(parameter Depth = 10 ) ( flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); - flopenrc #(`XLEN+4) IEUAdrWReg(clk, reset, FlushW, ~StallW, {InstrClassM, IEUAdrM}, {InstrClassW, IEUAdrW}); - endmodule From d030d323fdeb11147e6e90769b2e00dc57e68a09 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 18:02:00 -0600 Subject: [PATCH 08/55] Completed critical path gshare fix. --- src/ifu/bpred/bpred.sv | 8 ++++---- src/ifu/bpred/gshare.sv | 40 ++++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 39ced3d92..599b8853f 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -108,14 +108,14 @@ module bpred ( end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BPBranchF, .BranchD, .BranchE, .BranchM, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BPBranchF, .BranchD, .BranchE, .BranchM, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 2523693cf..e14f960f3 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -38,17 +38,17 @@ module gshare #(parameter k = 10, output logic [1:0] DirPredictionF, output logic DirPredictionWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, - input logic BPBranchF, BranchD, BranchE, BranchM, PCSrcE + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW, + input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE ); - logic MatchF, MatchD, MatchE, MatchM; - logic MatchNextX, MatchXF; + logic MatchF, MatchD, MatchE, MatchM, MatchW; + logic MatchX, MatchXF; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPrediction, ForwardDirPredictionF; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF, ForwardDirPredictionF; + logic [1:0] NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; - logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM; + logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic [k-1:0] GHRNextM, GHRNextF; @@ -68,22 +68,25 @@ module gshare #(parameter k = 10, assign IndexM = GHRM; end - assign MatchF = BPBranchF & ~FlushD & (IndexNextF == IndexF); - assign MatchD = BranchD & ~FlushE & (IndexNextF == IndexD); - assign MatchE = BranchE & ~FlushM & (IndexNextF == IndexE); - assign MatchM = BranchM & ~FlushW & (IndexNextF == IndexM); - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; + flopenrc #(k) IndexWReg(clk, reset, FlushW, ~StallW, IndexM, IndexW); - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + //assign MatchF = BPBranchF & ~FlushD & (IndexNextF == IndexF); + assign MatchD = BranchD & ~FlushE & (IndexF == IndexD); + assign MatchE = BranchE & ~FlushM & (IndexF == IndexE); + assign MatchM = BranchM & ~FlushW & (IndexF == IndexM); + assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); + assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewDirPrediction = MatchF ? {2{DirPredictionF[1]}} : - MatchD ? {2{DirPredictionD[1]}} : +// flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + + assign ForwardNewDirPredictionF = MatchD ? {2{DirPredictionD[1]}} : MatchE ? {NewDirPredictionE} : - NewDirPredictionM ; + MatchM ? {NewDirPredictionM} : + NewDirPredictionW ; - flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); + //flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); - assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; + assign DirPredictionF = MatchX ? ForwardNewDirPredictionF : TableDirPredictionF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), @@ -99,6 +102,7 @@ module gshare #(parameter k = 10, satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewDirPredictionM, NewDirPredictionW); assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; From 5c52827f51a36b5f21e1f9937604b1d63265b430 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 18:20:42 -0600 Subject: [PATCH 09/55] Cleanup. --- src/ifu/bpred/btb.sv | 9 +-------- src/ifu/bpred/gshare.sv | 9 ++------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 362e416b4..e70716144 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -51,7 +51,7 @@ module btb #(parameter Depth = 10 ) ( logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; logic [`XLEN-1:0] ResetPC; - logic MatchF, MatchD, MatchE, MatchM, MatchW, MatchNextX, MatchX; + logic MatchF, MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic UpdateEn; @@ -79,21 +79,14 @@ module btb #(parameter Depth = 10 ) ( assign MatchM = PCFIndex == PCMIndex; assign MatchW = PCFIndex == PCWIndex; assign MatchX = MatchD | MatchE | MatchM | MatchW; - -// flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : MatchE ? {InstrClassE, IEUAdrE} : MatchM ? {InstrClassM, IEUAdrM} : {InstrClassW, IEUAdrW} ; -/* -----\/----- EXCLUDED -----\/----- - flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); - -----/\----- EXCLUDED -----/\----- */ - assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index e14f960f3..70c03afb0 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -43,9 +43,9 @@ module gshare #(parameter k = 10, ); logic MatchF, MatchD, MatchE, MatchM, MatchW; - logic MatchX, MatchXF; + logic MatchX; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF, ForwardDirPredictionF; + logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF; logic [1:0] NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; @@ -70,22 +70,17 @@ module gshare #(parameter k = 10, flopenrc #(k) IndexWReg(clk, reset, FlushW, ~StallW, IndexM, IndexW); - //assign MatchF = BPBranchF & ~FlushD & (IndexNextF == IndexF); assign MatchD = BranchD & ~FlushE & (IndexF == IndexD); assign MatchE = BranchE & ~FlushM & (IndexF == IndexE); assign MatchM = BranchM & ~FlushW & (IndexF == IndexM); assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); assign MatchX = MatchD | MatchE | MatchM | MatchW; -// flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); - assign ForwardNewDirPredictionF = MatchD ? {2{DirPredictionD[1]}} : MatchE ? {NewDirPredictionE} : MatchM ? {NewDirPredictionM} : NewDirPredictionW ; - //flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); - assign DirPredictionF = MatchX ? ForwardNewDirPredictionF : TableDirPredictionF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), From 4058a4998534c457702b981a58d52abdb7c30835 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 18:36:41 -0600 Subject: [PATCH 10/55] Possible fix to btb performance issue. --- src/ifu/bpred/bpred.sv | 2 +- src/ifu/bpred/btb.sv | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 599b8853f..c802ffb21 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -285,7 +285,7 @@ module bpred ( // **** Fix me assign InstrClassM = {JalM, RetM, JumpM, BranchM}; flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); - flopenrc #(`XLEN) IEUAdrWReg(clk, reset, FlushW, ~StallW, IEUAdrM, IEUAdrW); + flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index e70716144..d2f0cb77b 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -51,7 +51,7 @@ module btb #(parameter Depth = 10 ) ( logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; logic [`XLEN-1:0] ResetPC; - logic MatchF, MatchD, MatchE, MatchM, MatchW, MatchX; + logic MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF; logic UpdateEn; @@ -73,7 +73,6 @@ module btb #(parameter Depth = 10 ) ( assign ResetPC = `RESET_VECTOR; assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; - assign MatchF = PCNextFIndex == PCFIndex; assign MatchD = PCFIndex == PCDIndex; assign MatchE = PCFIndex == PCEIndex; assign MatchM = PCFIndex == PCMIndex; @@ -81,9 +80,9 @@ module btb #(parameter Depth = 10 ) ( assign MatchX = MatchD | MatchE | MatchM | MatchW; assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : - MatchE ? {InstrClassE, IEUAdrE} : - MatchM ? {InstrClassM, IEUAdrM} : - {InstrClassW, IEUAdrW} ; + MatchE ? {InstrClassE, IEUAdrE} : + MatchM ? {InstrClassM, IEUAdrM} : + {InstrClassW, IEUAdrW} ; assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; From 6ff524d843936c0f77ec3e21f244da61a47b84a4 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 19:22:14 -0600 Subject: [PATCH 11/55] Renamed signals to match figure 10.18. --- src/ifu/bpred/bpred.sv | 2 +- src/ifu/bpred/btb.sv | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index c802ffb21..ddff3a04c 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -150,7 +150,7 @@ module bpred ( TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BTAF, .BTAD, - .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), + .BTBIClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, .IEUAdrE, .IEUAdrM, .IEUAdrW, .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}), diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index d2f0cb77b..5ad92517d 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -37,7 +37,7 @@ module btb #(parameter Depth = 10 ) ( input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAD, - output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb @@ -53,7 +53,7 @@ module btb #(parameter Depth = 10 ) ( logic [`XLEN-1:0] ResetPC; logic MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+3:0] TableBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredF; logic UpdateEn; // hashing function for indexing the PC @@ -84,13 +84,13 @@ module btb #(parameter Depth = 10 ) ( MatchM ? {InstrClassM, IEUAdrM} : {InstrClassW, IEUAdrW} ; - assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; + assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( - .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), + .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); From e549bec060a22cc9245737b46136fce9d7488332 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 19:51:47 -0600 Subject: [PATCH 12/55] Renamed signals to match new figures. --- src/ifu/bpred/bpred.sv | 24 ++++++++++---------- src/ifu/bpred/gshare.sv | 38 ++++++++++++++++---------------- src/ifu/bpred/gsharebasic.sv | 22 +++++++++--------- src/ifu/bpred/twoBitPredictor.sv | 22 +++++++++--------- src/ifu/ifu.sv | 6 ++--- src/privileged/csr.sv | 4 ++-- src/privileged/csrc.sv | 4 ++-- src/privileged/privileged.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 ++--- 9 files changed, 65 insertions(+), 65 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index ddff3a04c..015d4c908 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -64,18 +64,18 @@ module bpred ( // Report branch prediction status output logic BPPredWrongE, // Prediction is wrong output logic BPPredWrongM, // Prediction is wrong - output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic PredictionInstrClassWrongM // Class prediction is wrong ); - logic [1:0] DirPredictionF; + logic [1:0] BPDirPredF; logic [`XLEN-1:0] BTAF, RASPCF; logic PredictionPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic DirPredictionWrongE; + logic BPDirPredWrongE; logic BPPCSrcF; logic [`XLEN-1:0] BPPredPCF; @@ -103,29 +103,29 @@ module bpred ( if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor @@ -134,7 +134,7 @@ module bpred ( localHistoryPredictor DirPredictor(.clk, .reset, .StallF, .StallE, .LookUpPC(PCNextF), - .Prediction(DirPredictionF), + .Prediction(BPDirPredF), // update .UpdatePC(PCE), .UpdateEN(InstrClassE[0] & ~StallE), @@ -192,7 +192,7 @@ module bpred ( // This section connects the BTB's instruction class prediction. assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; end - assign BPPCSrcF = (BPBranchF & DirPredictionF[1]) | BPJumpF; + assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, @@ -275,8 +275,8 @@ module bpred ( flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM}); + {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, + {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); end else begin assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 70c03afb0..596e587c8 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -35,8 +35,8 @@ module gshare #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW, input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE @@ -45,8 +45,8 @@ module gshare #(parameter k = 10, logic MatchF, MatchD, MatchE, MatchM, MatchW; logic MatchX; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF; - logic [1:0] NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; + logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, ForwardNewBPDirPredF; + logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; @@ -76,33 +76,33 @@ module gshare #(parameter k = 10, assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewDirPredictionF = MatchD ? {2{DirPredictionD[1]}} : - MatchE ? {NewDirPredictionE} : - MatchM ? {NewDirPredictionM} : - NewDirPredictionW ; + assign ForwardNewBPDirPredF = MatchD ? {2{BPDirPredD[1]}} : + MatchE ? {NewBPDirPredE} : + MatchM ? {NewBPDirPredM} : + NewBPDirPredW ; - assign DirPredictionF = MatchX ? ForwardNewDirPredictionF : TableDirPredictionF; + assign BPDirPredF = MatchX ? ForwardNewBPDirPredF : TableBPDirPredF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), .ra1(IndexNextF), - .rd1(TableDirPredictionF), + .rd1(TableBPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewDirPredictionM, NewDirPredictionW); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); + flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - assign GHRNextF = BPBranchF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; - assign GHRF = BranchD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; + assign GHRNextF = BPBranchF ? {BPDirPredF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchD ? {BPDirPredD[1], GHRD[k-1:1]} : GHRD; assign GHRD = BranchE ? {PCSrcE, GHRE[k-1:1]} : GHRE; assign GHRE = BranchM ? {PCSrcM, GHRM[k-1:1]} : GHRM; diff --git a/src/ifu/bpred/gsharebasic.sv b/src/ifu/bpred/gsharebasic.sv index e793e7ac6..130f17328 100644 --- a/src/ifu/bpred/gsharebasic.sv +++ b/src/ifu/bpred/gsharebasic.sv @@ -35,16 +35,16 @@ module gsharebasic #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchE, BranchM, PCSrcE ); logic [k-1:0] IndexNextF, IndexM; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; @@ -61,19 +61,19 @@ module gsharebasic #(parameter k = 10, ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; assign GHRNext = BranchM ? {PCSrcM, GHR[k-1:1]} : GHR; flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchM, GHRNext, GHR); diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 58bf1c6bd..7011a0580 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -34,8 +34,8 @@ module twoBitPredictor #(parameter k = 10) ( input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCM, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, input logic BranchE, BranchM, input logic PCSrcE ); @@ -43,8 +43,8 @@ module twoBitPredictor #(parameter k = 10) ( logic [k-1:0] IndexNextF, IndexM; logic [1:0] PredictionMemory; logic DoForwarding, DoForwardingF; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; // hashing function for indexing the PC // We have k bits to index, but XLEN bits as the input. @@ -57,19 +57,19 @@ module twoBitPredictor #(parameter k = 10) ( ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 71221ef63..e13a08bf9 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -66,7 +66,7 @@ module ifu ( // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic JumpOrTakenBranchM, - output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic PredictionInstrClassWrongM, // Class prediction is wrong @@ -332,12 +332,12 @@ module ifu ( .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); end else begin : bpred mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); assign BPPredWrongE = PCSrcE; - assign {InstrClassM, DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; + assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 7b765bae0..ee3d947fd 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -57,7 +57,7 @@ module csr #(parameter input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, - input logic DirPredictionWrongM, + input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, @@ -259,7 +259,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPPredWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index d61835826..5cc58ce34 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -44,7 +44,7 @@ module csrc #(parameter input logic StallE, StallM, input logic FlushM, input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, - input logic DirPredictionWrongM, + input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, @@ -86,7 +86,7 @@ module csrc #(parameter assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 300da8a65..93b7f9729 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -46,7 +46,7 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic DirPredictionWrongM, // branch predictor guessed wrong directoin + input logic BPDirPredWrongM, // branch predictor guessed wrong directoin input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class @@ -125,7 +125,7 @@ module privileged ( .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredWrongM, .PredictionInstrClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 02074f973..c4b83386d 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -141,7 +141,7 @@ module wallypipelinedcore ( logic LSUHREADY; logic BPPredWrongE, BPPredWrongM; - logic DirPredictionWrongM; + logic BPDirPredWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; logic PredictionInstrClassWrongM; @@ -176,7 +176,7 @@ module wallypipelinedcore ( .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPPredWrongE, .BPPredWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, - .InstrD, .InstrM, .PCM, .InstrClassM, .DirPredictionWrongM, .JumpOrTakenBranchM, + .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, @@ -289,7 +289,7 @@ module wallypipelinedcore ( .RetM, .TrapM, .sfencevmaM, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .BPPredWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, From ed7ab402adff68230d7253f28c29d84bb2efde67 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 19:56:55 -0600 Subject: [PATCH 13/55] More signal renames. --- src/ifu/bpred/gshare.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 596e587c8..30af1c4cf 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -45,7 +45,7 @@ module gshare #(parameter k = 10, logic MatchF, MatchD, MatchE, MatchM, MatchW; logic MatchX; - logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, ForwardNewBPDirPredF; + logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, FwdNewDirPredF; logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; @@ -76,12 +76,12 @@ module gshare #(parameter k = 10, assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewBPDirPredF = MatchD ? {2{BPDirPredD[1]}} : + assign FwdNewDirPredF = MatchD ? {2{BPDirPredD[1]}} : MatchE ? {NewBPDirPredE} : MatchM ? {NewBPDirPredM} : NewBPDirPredW ; - assign BPDirPredF = MatchX ? ForwardNewBPDirPredF : TableBPDirPredF; + assign BPDirPredF = MatchX ? FwdNewDirPredF : TableBPDirPredF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallM & ~FlushM), From 63b9f9ca3d0b184ea9ce190330eb426117b6f975 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 22:55:51 -0600 Subject: [PATCH 14/55] gshare cleanup. --- src/ifu/bpred/bpred.sv | 4 ++-- src/ifu/bpred/gshare.sv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 015d4c908..02e3e1aa2 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -108,13 +108,13 @@ module bpred ( end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BPDirPredF, .BPDirPredWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BPDirPredF, .BPDirPredWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 30af1c4cf..40cd94e86 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -38,7 +38,7 @@ module gshare #(parameter k = 10, output logic [1:0] BPDirPredF, output logic BPDirPredWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW, + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE ); From 7500bb75c627b56fed55eac4fe04d299e980beba Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Feb 2023 22:57:32 -0600 Subject: [PATCH 15/55] PHT was enabled using the wrong ~flush and ~stall. --- src/ifu/bpred/gshare.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 40cd94e86..66b2b4842 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -84,7 +84,7 @@ module gshare #(parameter k = 10, assign BPDirPredF = MatchX ? FwdNewDirPredF : TableBPDirPredF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), .rd1(TableBPDirPredF), .wa2(IndexM), From 27acb9021740d584a9794224a7c7b70c5ebb892a Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 06:30:43 -0800 Subject: [PATCH 16/55] Fixed SSTC being unusable in M-MODE without Status.TM. Disable STIMECMP registers when SSTC_SUPPORTED = 0 --- src/mmu/pmpadrdec.sv | 4 ++++ src/privileged/csrs.sv | 22 ++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/mmu/pmpadrdec.sv b/src/mmu/pmpadrdec.sv index 4aeea116a..5f666eeaf 100644 --- a/src/mmu/pmpadrdec.sv +++ b/src/mmu/pmpadrdec.sv @@ -85,5 +85,9 @@ module pmpadrdec ( assign W = PMPCfg[1]; assign R = PMPCfg[0]; assign Active = |PMPCfg[4:3]; + + // known bug: The size of the access is not yet checked. For example, if an NA4 entry matches 0xC-0xF and the system + // attempts an 8-byte access to 0x8, the access should fail (see page 60 of privileged specification 20211203). This + // implementation will not detect the failure. endmodule diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 99c56ad1b..286ff7907 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -86,8 +86,8 @@ module csrs #(parameter assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & InstrValidNotFlushedM; assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & InstrValidNotFlushedM; assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & InstrValidNotFlushedM; - assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & MCOUNTEREN_TM & InstrValidNotFlushedM; - assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & MCOUNTEREN_TM & (`XLEN == 32) & InstrValidNotFlushedM; + assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & InstrValidNotFlushedM; + assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32) & InstrValidNotFlushedM; // CSRs flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); @@ -100,12 +100,14 @@ module csrs #(parameter else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - if (`XLEN == 64) - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); - else begin - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); - flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); - end + if (`SSTC_SUPPORTED) begin + if (`XLEN == 64) + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); + else begin + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); + flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); + end + end else STIMECMP_REGW = 0; // Supervisor timer interrupt logic // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs @@ -132,12 +134,12 @@ module csrs #(parameter if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; end SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; - STIMECMP: if (MCOUNTEREN_TM) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; + STIMECMP: if (`SSTC_SUPPORTED & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; else begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; end - STIMECMPH: if (MCOUNTEREN_TM & (`XLEN == 32)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; + STIMECMPH: if (`SSTC_SUPPORTED & (`XLEN == 32) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; else begin // not supported for RV64 CSRSReadValM = 0; IllegalCSRSAccessM = 1; From d50658addf1d7692d9bda4c05ecbf31a0b033ac5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 07:12:13 -0800 Subject: [PATCH 17/55] Fixed missing assign when SSTC is not supported --- src/privileged/csrs.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 286ff7907..253d02457 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -107,7 +107,7 @@ module csrs #(parameter flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); end - end else STIMECMP_REGW = 0; + end else assign STIMECMP_REGW = 0; // Supervisor timer interrupt logic // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs From e3e5100f8db39be67e44f71f9aca321c45bd5196 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 07:12:43 -0800 Subject: [PATCH 18/55] Renamed DAPageFault to HPTWDAPageFault in hptw to avoid name conflict with DAPageFault from tlbcontrol --- src/mmu/hptw.sv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index c0b7ad933..e62373468 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -87,7 +87,7 @@ module hptw ( logic [`XLEN-1:0] TranslationVAdr; logic [`XLEN-1:0] NextPTE; logic UpdatePTE; - logic DAPageFault; + logic HPTWDAPageFault; logic [`PA_BITS-1:0] HPTWReadAdr; logic SelHPTWAdr; logic [`XLEN+1:0] HPTWAdrExt; @@ -167,14 +167,14 @@ module hptw ( // memory access. If there is the PTE needs to be updated seting Access // and possibly also Dirty. Dirty is set if the operation is a store/amo. // However any other fault should not cause the update. - assign DAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; + assign HPTWDAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; assign HPTWRW[0] = (WalkerState == UPDATE_PTE); - assign UpdatePTE = (WalkerState == LEAF) & DAPageFault; + assign UpdatePTE = (WalkerState == LEAF) & HPTWDAPageFault; end else begin // block: hptwwrites assign NextPTE = ReadDataM; assign HPTWAdr = HPTWReadAdr; - assign DAPageFault = '0; + assign HPTWDAPageFault = '0; assign UpdatePTE = '0; assign HPTWRW[0] = '0; end @@ -182,8 +182,8 @@ module hptw ( // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign DTLBWriteM = (WalkerState == LEAF & ~DAPageFault) & DTLBWalk; - assign ITLBWriteF = (WalkerState == LEAF & ~DAPageFault) & ~DTLBWalk; + assign DTLBWriteM = (WalkerState == LEAF & ~HPTWDAPageFault) & DTLBWalk; + assign ITLBWriteF = (WalkerState == LEAF & ~HPTWDAPageFault) & ~DTLBWalk; // FSM to track PageType based on the levels of the page table traversed flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); @@ -262,7 +262,7 @@ module hptw ( else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - LEAF: if (`HPTW_WRITES_SUPPORTED & DAPageFault) NextWalkerState = UPDATE_PTE; + LEAF: if (`HPTW_WRITES_SUPPORTED & HPTWDAPageFault) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; From 4579a9d0c2a82eb4c9feed70a89ccae0bcdc7434 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 09:38:32 -0800 Subject: [PATCH 19/55] Renamed HPTW_WRITES_SUPPORTED to SVADU_SUPPORTED --- config/buildroot/wally-config.vh | 2 +- config/fpga/wally-config.vh | 2 +- config/rv32e/wally-config.vh | 2 +- config/rv32gc/wally-config.vh | 2 +- config/rv32i/wally-config.vh | 2 +- config/rv32imc/wally-config.vh | 2 +- config/rv64fpquad/wally-config.vh | 2 +- config/rv64gc/wally-config.vh | 2 +- config/rv64i/wally-config.vh | 2 +- src/ifu/spill.sv | 2 +- src/mmu/hptw.sv | 10 +++++----- src/mmu/tlbcontrol.sv | 4 ++-- src/wally/cvw.sv | 2 +- 13 files changed, 18 insertions(+), 18 deletions(-) diff --git a/config/buildroot/wally-config.vh b/config/buildroot/wally-config.vh index bfe69e84d..fbb5799d5 100644 --- a/config/buildroot/wally-config.vh +++ b/config/buildroot/wally-config.vh @@ -135,7 +135,7 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 // FPU division architecture `define RADIX 32'h4 diff --git a/config/fpga/wally-config.vh b/config/fpga/wally-config.vh index 3ae91e3a7..03bc3f755 100644 --- a/config/fpga/wally-config.vh +++ b/config/fpga/wally-config.vh @@ -144,7 +144,7 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32e/wally-config.vh b/config/rv32e/wally-config.vh index 6e0de3347..b000b7911 100644 --- a/config/rv32e/wally-config.vh +++ b/config/rv32e/wally-config.vh @@ -138,7 +138,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index 57857f3be..d1571067b 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -137,7 +137,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32i/wally-config.vh b/config/rv32i/wally-config.vh index efbf6e7c0..0f2e91c95 100644 --- a/config/rv32i/wally-config.vh +++ b/config/rv32i/wally-config.vh @@ -138,7 +138,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32imc/wally-config.vh b/config/rv32imc/wally-config.vh index 8fb29a678..f6b29895d 100644 --- a/config/rv32imc/wally-config.vh +++ b/config/rv32imc/wally-config.vh @@ -137,7 +137,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64fpquad/wally-config.vh b/config/rv64fpquad/wally-config.vh index dd8058c28..3e4b91600 100644 --- a/config/rv64fpquad/wally-config.vh +++ b/config/rv64fpquad/wally-config.vh @@ -140,7 +140,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index 4100f4c08..f0dad93b4 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -140,7 +140,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64i/wally-config.vh b/config/rv64i/wally-config.vh index a3702c3fd..f485c667d 100644 --- a/config/rv64i/wally-config.vh +++ b/config/rv64i/wally-config.vh @@ -140,7 +140,7 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4bb677cab..a5b274633 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -77,7 +77,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrDAPageFaultF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index e62373468..19a3aca79 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -125,7 +125,7 @@ module hptw ( assign ValidLeafPTE = ValidPTE & LeafPTE; assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites + if(`SVADU_SUPPORTED) begin : hptwwrites logic ReadAccess, WriteAccess; logic InvalidRead, InvalidWrite; logic UpperBitsUnequalPageFault; @@ -262,7 +262,7 @@ module hptw ( else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - LEAF: if (`HPTW_WRITES_SUPPORTED & HPTWDAPageFault) NextWalkerState = UPDATE_PTE; + LEAF: if (`SVADU_SUPPORTED & HPTWDAPageFault) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; @@ -273,8 +273,8 @@ module hptw ( assign SelHPTW = WalkerState != IDLE; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); - assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); - assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); + assign ITLBMissOrDAFaultF = ITLBMissF | (`SVADU_SUPPORTED & InstrDAPageFaultF); + assign DTLBMissOrDAFaultM = DTLBMissM | (`SVADU_SUPPORTED & DataDAPageFaultM); // HTPW address/data/control muxing @@ -291,7 +291,7 @@ module hptw ( mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); - if(`HPTW_WRITES_SUPPORTED) + if(`SVADU_SUPPORTED) mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IHWriteDataM); else assign IHWriteDataM = WriteDataM; diff --git a/src/mmu/tlbcontrol.sv b/src/mmu/tlbcontrol.sv index abbdba8f6..4007b6a08 100644 --- a/src/mmu/tlbcontrol.sv +++ b/src/mmu/tlbcontrol.sv @@ -76,7 +76,7 @@ module tlbcontrol #(parameter ITLB = 0) ( // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites + if(`SVADU_SUPPORTED) begin : hptwwrites assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end else begin @@ -98,7 +98,7 @@ module tlbcontrol #(parameter ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites + if(`SVADU_SUPPORTED) begin : hptwwrites assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); end else begin diff --git a/src/wally/cvw.sv b/src/wally/cvw.sv index c4541b698..eb6146283 100644 --- a/src/wally/cvw.sv +++ b/src/wally/cvw.sv @@ -101,7 +101,7 @@ package cvw; parameter BPRED_SUPPORTED = `BPRED_SUPPORTED; parameter BPRED_TYPE = `BPRED_TYPE; parameter BPRED_SIZE = `BPRED_SIZE; - parameter HPTW_WRITES_SUPPORTED = `HPTW_WRITES_SUPPORTED; + parameter SVADU_SUPPORTED = `SVADU_SUPPORTED; // parameter = `; From 2203c057240b525fe592ab348fe36c06f9cf050b Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 09:58:34 -0800 Subject: [PATCH 20/55] Access faults are geted by ~TLBMiss rather than ~(Translate & ~TLBHit) --- src/mmu/mmu.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 5b5248161..0193a5478 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -120,9 +120,9 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( // Access faults // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. - assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); - assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); - assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~(Translate & ~TLBHit); + assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~TLBMiss; + assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~TLBMiss; + assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~TLBMiss; // Misaligned faults always_comb From 2ad62ea31faf4b08b2b5c87450a966ec767ea60d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 10:04:16 -0800 Subject: [PATCH 21/55] Removed unneeded TLBFlush from TLBMiss --- src/mmu/tlbcontrol.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/tlbcontrol.sv b/src/mmu/tlbcontrol.sv index 4007b6a08..04af0f2a9 100644 --- a/src/mmu/tlbcontrol.sv +++ b/src/mmu/tlbcontrol.sv @@ -109,5 +109,5 @@ module tlbcontrol #(parameter ITLB = 0) ( end assign TLBHit = CAMHit & TLBAccess; - assign TLBMiss = (~CAMHit | TLBFlush) & Translate & TLBAccess; + assign TLBMiss = ~CAMHit & TLBAccess & Translate ; endmodule From a9e884acc803c5db4bf54b558d5a70b5cab9551a Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 17:28:05 -0800 Subject: [PATCH 22/55] Moved TLB into subdirectory of MMU --- src/mmu/tlb/tlb.sv | 121 +++++++++++++++++++++++++++++++++ src/mmu/tlb/tlbcam.sv | 62 +++++++++++++++++ src/mmu/tlb/tlbcamline.sv | 106 +++++++++++++++++++++++++++++ src/mmu/tlb/tlbcontrol.sv | 113 ++++++++++++++++++++++++++++++ src/mmu/tlb/tlblru.sv | 56 +++++++++++++++ src/mmu/tlb/tlbmixer.sv | 69 +++++++++++++++++++ src/mmu/tlb/tlbram.sv | 54 +++++++++++++++ src/mmu/tlb/tlbramline.sv | 43 ++++++++++++ src/mmu/{ => tlb}/vm64check.sv | 6 +- 9 files changed, 627 insertions(+), 3 deletions(-) create mode 100644 src/mmu/tlb/tlb.sv create mode 100644 src/mmu/tlb/tlbcam.sv create mode 100644 src/mmu/tlb/tlbcamline.sv create mode 100644 src/mmu/tlb/tlbcontrol.sv create mode 100644 src/mmu/tlb/tlblru.sv create mode 100644 src/mmu/tlb/tlbmixer.sv create mode 100644 src/mmu/tlb/tlbram.sv create mode 100644 src/mmu/tlb/tlbramline.sv rename src/mmu/{ => tlb}/vm64check.sv (90%) diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv new file mode 100644 index 000000000..f8bf0d178 --- /dev/null +++ b/src/mmu/tlb/tlb.sv @@ -0,0 +1,121 @@ +/////////////////////////////////////////// +// tlb.sv +// +// Written: jtorrey@hmc.edu 16 February 2021 +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding the SvMode signal, +// and using it to decide the translate signal and get the virtual page number +// +// Purpose: Translation lookaside buffer +// Cache of virtural-to-physical address translations +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * SV32 specs + * ---------- + * Virtual address [31:0] (32 bits) + * [________________________________] + * |--VPN1--||--VPN0--||----OFF---| + * 10 10 12 + * + * Physical address [33:0] (34 bits) + * [__________________________________] + * |---PPN1---||--PPN0--||----OFF---| + * 12 10 12 + * + * Page Table Entry [31:0] (32 bits) + * [________________________________] + * |---PPN1---||--PPN0--|||DAGUXWRV + * 12 10 ^^ + * RSW(2) -- for OS + */ + +`include "wally-config.vh" + +// The TLB will have 2**ENTRY_BITS total entries +module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( + input logic clk, reset, + input logic [`SVMODE_BITS-1:0] SATP_MODE, // Current address translation mode + input logic [`ASID_BITS-1:0] SATP_ASID, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor + input logic ReadAccess, + input logic WriteAccess, + input logic DisableTranslation, + input logic [`XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) + input logic [`XLEN-1:0] PTE, + input logic [1:0] PageTypeWriteVal, + input logic TLBWrite, + input logic TLBFlush, + output logic [`PA_BITS-1:0] TLBPAdr, + output logic TLBMiss, + output logic TLBHit, + output logic Translate, + output logic TLBPageFault, + output logic DAPageFault +); + + logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex + // Sections of the virtual and physical addresses + logic [`VPN_BITS-1:0] VPN; + logic [`PPN_BITS-1:0] PPN; + // Sections of the page table entry + logic [7:0] PTEAccessBits; + logic [1:0] HitPageType; + logic CAMHit; + logic SV39Mode; + logic Misaligned; + logic MegapageMisaligned; + + if(`XLEN == 32) begin + assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 + assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; + end else begin // 64-bit + logic GigapageMisaligned, TerapageMisaligned; + assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 + assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 + assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 + assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | + ((HitPageType == 2'b10) & GigapageMisaligned) | + ((HitPageType == 2'b01) & MegapageMisaligned); + end + + assign VPN = VAdr[`VPN_BITS+11:12]; + + tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, + .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, + .DAPageFault, .SV39Mode, .Translate); + + tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); + tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) + tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, + .SATP_ASID, .Matches, .HitPageType, .CAMHit); + tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); + + // Replace segments of the virtual page number with segments of the physical + // page number. For 4 KB pages, the entire virtual page number is replaced. + // For superpages, some segments are considered offsets into a larger page. + tlbmixer Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr); + +endmodule diff --git a/src/mmu/tlb/tlbcam.sv b/src/mmu/tlb/tlbcam.sv new file mode 100644 index 000000000..449411e20 --- /dev/null +++ b/src/mmu/tlb/tlbcam.sv @@ -0,0 +1,62 @@ +/////////////////////////////////////////// +// tlbcam.sv +// +// Written: jtorrey@hmc.edu 16 February 2021 +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants +// Mostly this was to make the cam_lines work. +// +// Purpose: Stores virtual page numbers with cached translations. +// Determines whether a given virtual page number is in the TLB. +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbcam #(parameter TLB_ENTRIES = 8, KEY_BITS = 20, SEGMENT_BITS = 10) ( + input logic clk, reset, + input logic [`VPN_BITS-1:0] VPN, + input logic [1:0] PageTypeWriteVal, + input logic SV39Mode, + input logic TLBFlush, + input logic [TLB_ENTRIES-1:0] WriteEnables, + input logic [TLB_ENTRIES-1:0] PTE_Gs, + input logic [`ASID_BITS-1:0] SATP_ASID, + output logic [TLB_ENTRIES-1:0] Matches, + output logic [1:0] HitPageType, + output logic CAMHit +); + + logic [1:0] PageTypeRead [TLB_ENTRIES-1:0]; + + // TLB_ENTRIES CAM lines, each of which will independently consider + // whether the requested virtual address is a match. Each line stores the + // original virtual page number from when the address was written, regardless + // of page type. However, matches are determined based on a subset of the + // page number segments. + + tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( + .clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PageTypeWriteVal, .TLBFlush, + .WriteEnable(WriteEnables), .PageTypeRead, .Match(Matches)); + assign CAMHit = |Matches & ~TLBFlush; + or_rows #(TLB_ENTRIES,2) PageTypeOr(PageTypeRead, HitPageType); +endmodule + diff --git a/src/mmu/tlb/tlbcamline.sv b/src/mmu/tlb/tlbcamline.sv new file mode 100644 index 000000000..9f7a68e81 --- /dev/null +++ b/src/mmu/tlb/tlbcamline.sv @@ -0,0 +1,106 @@ +/////////////////////////////////////////// +// tlbcamline.sv +// +// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 +// Modified: kmacsaigoren@hmc.edu 1 June 2021 +// Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants +// Mostly this was done to make the PageNumberMixer work. +// +// Purpose: CAM line for the translation lookaside buffer (TLB) +// Determines whether a virtual page number matches the stored key. +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbcamline #(parameter KEY_BITS = 20, SEGMENT_BITS = 10) ( + input logic clk, reset, + input logic [`VPN_BITS-1:0] VPN, // The requested page number to compare against the key + input logic [`ASID_BITS-1:0] SATP_ASID, + input logic SV39Mode, + input logic WriteEnable, // Write a new entry to this line + input logic PTE_G, + input logic [1:0] PageTypeWriteVal, + input logic TLBFlush, // Flush this line (set valid to 0) + output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? + output logic Match +); + + // PageTypeRead is a key for a tera, giga, mega, or kilopage. + // PageType == 2'b00 --> kilopage + // PageType == 2'b01 --> megapage + // PageType == 2'b10 --> gigapage + // PageType == 2'b11 --> terapage + + // This entry has KEY_BITS for the key plus one valid bit. + logic Valid; + logic [KEY_BITS-1:0] Key; + logic [1:0] PageType; + + // Split up key and query into sections for each page table level. + logic [`ASID_BITS-1:0] Key_ASID; + logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; + logic MatchASID, Match0, Match1; + + assign MatchASID = (SATP_ASID == Key_ASID) | PTE_G; + + if (`XLEN == 32) begin: match + + assign {Key_ASID, Key1, Key0} = Key; + assign {Query1, Query0} = VPN; + + // Calculate the actual match value based on the input vpn and the page type. + // For example, a megapage in SV32 only cares about VPN[1], so VPN[0] + // should automatically match. + assign Match0 = (Query0 == Key0) | (PageType[0]); // least signifcant section + assign Match1 = (Query1 == Key1); + + assign Match = Match0 & Match1 & MatchASID & Valid; + end else begin: match + + logic [SEGMENT_BITS-1:0] Key2, Key3, Query2, Query3; + logic Match2, Match3; + + assign {Query3, Query2, Query1, Query0} = VPN; + assign {Key_ASID, Key3, Key2, Key1, Key0} = Key; + + // Calculate the actual match value based on the input vpn and the page type. + // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] + // should automatically match. + assign Match0 = (Query0 == Key0) | (PageType > 2'd0); // least signifcant section + assign Match1 = (Query1 == Key1) | (PageType > 2'd1); + assign Match2 = (Query2 == Key2) | (PageType > 2'd2); + assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used + + assign Match = Match0 & Match1 & Match2 & Match3 & MatchASID & Valid; + end + + // On a write, update the type of the page referred to by this line. + flopenr #(2) pagetypeflop(clk, reset, WriteEnable, PageTypeWriteVal, PageType); + assign PageTypeRead = PageType & {2{Match}}; + + // On a write, set the valid bit high and update the stored key. + // On a flush, zero the valid bit and leave the key unchanged. + // *** Might we want to update stored key right away to output match on the + // write cycle? (using a mux) + flopenr #(1) validbitflop(clk, reset, WriteEnable | TLBFlush, ~TLBFlush, Valid); + flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, {SATP_ASID, VPN}, Key); +endmodule diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv new file mode 100644 index 000000000..45c56f1b7 --- /dev/null +++ b/src/mmu/tlb/tlbcontrol.sv @@ -0,0 +1,113 @@ +/////////////////////////////////////////// +// tlbcontrol.sv +// +// Written: David_Harris@hmc.edu 5 July 2021 +// Modified: +// +// Purpose: Control signals for TLB +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbcontrol #(parameter ITLB = 0) ( + input logic [`SVMODE_BITS-1:0] SATP_MODE, + input logic [`XLEN-1:0] VAdr, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor + input logic ReadAccess, WriteAccess, + input logic DisableTranslation, + input logic TLBFlush, // Invalidate all TLB entries + input logic [7:0] PTEAccessBits, + input logic CAMHit, + input logic Misaligned, + output logic TLBMiss, + output logic TLBHit, + output logic TLBPageFault, + output logic DAPageFault, + output logic SV39Mode, + output logic Translate +); + + // Sections of the page table entry + logic [1:0] EffectivePrivilegeMode; + + logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits + logic UpperBitsUnequal; + logic TLBAccess; + logic ImproperPrivilege; + + // Grab the sv mode from SATP and determine whether translation should occur + assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 + assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; + + // Determine whether TLB is being used + assign TLBAccess = ReadAccess | WriteAccess; + + // Check that upper bits are legal (all 0s or all 1s) + vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); + + // unswizzle useful PTE bits + assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; + assign {PTE_U, PTE_X, PTE_W, PTE_R, PTE_V} = PTEAccessBits[4:0]; + + // Check whether the access is allowed, page faulting if not. + if (ITLB == 1) begin:itlb // Instruction TLB fault checking + // User mode may only execute user mode pages, and supervisor mode may + // only execute non-user mode pages. + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == `S_MODE) & PTE_U); + if(`SVADU_SUPPORTED) begin : hptwwrites + assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V)); + end else begin + // fault for software handling if access bit is off + assign DAPageFault = ~PTE_A; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequal | Misaligned | ~PTE_V)); + end + end else begin:dtlb // Data TLB fault checking + logic InvalidRead, InvalidWrite; + + // User mode may only load/store from user mode pages, and supervisor mode + // may only access user mode pages when STATUS_SUM is low. + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == `S_MODE) & PTE_U & ~STATUS_SUM); + // Check for read error. Reads are invalid when the page is not readable + // (and executable pages are not readable) or when the page is neither + // readable nor executable (and executable pages are readable). + assign InvalidRead = ReadAccess & ~PTE_R & (~STATUS_MXR | ~PTE_X); + // Check for write error. Writes are invalid when the page's write bit is + // low. + assign InvalidWrite = WriteAccess & ~PTE_W; + if(`SVADU_SUPPORTED) begin : hptwwrites + assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequal | Misaligned | ~PTE_V)); + end else begin + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequal | Misaligned | ~PTE_V)); + end + end + + assign TLBHit = CAMHit & TLBAccess; + assign TLBMiss = ~CAMHit & TLBAccess & Translate ; +endmodule diff --git a/src/mmu/tlb/tlblru.sv b/src/mmu/tlb/tlblru.sv new file mode 100644 index 000000000..6cdb475e3 --- /dev/null +++ b/src/mmu/tlb/tlblru.sv @@ -0,0 +1,56 @@ +/////////////////////////////////////////// +// tlblru.sv +// +// Written: tfleming@hmc.edu & jtorrey@hmc.edu 16 February 2021 +// Modified: +// +// Purpose: Implementation of bit pseudo least-recently-used algorithm for +// cache evictions. Outputs the index of the next entry to be written. +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module tlblru #(parameter TLB_ENTRIES = 8) ( + input logic clk, reset, + input logic TLBWrite, + input logic TLBFlush, + input logic [TLB_ENTRIES-1:0] Matches, + input logic CAMHit, + output logic [TLB_ENTRIES-1:0] WriteEnables +); + + logic [TLB_ENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; + logic [TLB_ENTRIES-1:0] WriteLines; + logic [TLB_ENTRIES-1:0] AccessLines; // One-hot encodings of which line is being accessed + logic AllUsed; // High if the next access causes all RU bits to be 1 + + // Find the first line not recently used + priorityonehot #(TLB_ENTRIES) nru(.a(~RUBits), .y(WriteLines)); + + // Track recently used lines, updating on a CAM Hit or TLB write + assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}}; + assign AccessLines = TLBWrite ? WriteLines : Matches; + assign RUBitsAccessed = AccessLines | RUBits; + assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none + assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; + + // enable must be ORd with TLBFlush to ensure flop fires on a flush. DH 7/8/21 + flopenrc #(TLB_ENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit | TLBWrite), RUBitsNext, RUBits); +endmodule diff --git a/src/mmu/tlb/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv new file mode 100644 index 000000000..f5555c2a1 --- /dev/null +++ b/src/mmu/tlb/tlbmixer.sv @@ -0,0 +1,69 @@ +/////////////////////////////////////////// +// tlbmixer.sv +// +// Written: David Harris and kmacsaigoren@hmc.edu 7 June 2021 +// Modified: +// +// +// Purpose: Takes two page numbers and replaces segments of the first page +// number with segments from the second, based on the page type. +// NOTE: this DOES NOT include the 12 bit offset, which is the same no matter the translation mode or page type. +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbmixer ( + input logic [`VPN_BITS-1:0] VPN, + input logic [`PPN_BITS-1:0] PPN, + input logic [1:0] HitPageType, + input logic [11:0] Offset, + input logic TLBHit, + output logic [`PA_BITS-1:0] TLBPAdr +); + + localparam EXTRA_BITS = `PPN_BITS - `VPN_BITS; + logic [`PPN_BITS-1:0] ZeroExtendedVPN; + logic [`PPN_BITS-1:0] PageNumberMask; + logic [`PPN_BITS-1:0] PPNMixed; + + // produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages + if (`XLEN == 32) + // kilopage: 22 bits of PPN, 0 bits of VPN + // megapage: 12 bits of PPN, 10 bits of VPN + mux2 #(22) pnm(22'h000000, 22'h0003FF, HitPageType[0], PageNumberMask); + else + // kilopage: 44 bits of PPN, 0 bits of VPN + // megapage: 35 bits of PPN, 9 bits of VPN + // gigapage: 26 bits of PPN, 18 bits of VPN + // terapage: 17 bits of PPN, 27 bits of VPN + mux4 #(44) pnm(44'h00000000000, 44'h000000001FF, 44'h0000003FFFF, 44'h00007FFFFFF, HitPageType, PageNumberMask); + + // merge low segments of VPN with high segments of PPN decided by the pagetype. + assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. + assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // + //mux2 #(1) mixmux[`PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed); + //assign PPNMixed = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); + // Output the hit physical address if translation is currently on. + // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal + mux2 #(`PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system + +endmodule diff --git a/src/mmu/tlb/tlbram.sv b/src/mmu/tlb/tlbram.sv new file mode 100644 index 000000000..febb8b6f6 --- /dev/null +++ b/src/mmu/tlb/tlbram.sv @@ -0,0 +1,54 @@ +/////////////////////////////////////////// +// tlbram.sv +// +// Written: jtorrey@hmc.edu & tfleming@hmc.edu 16 February 2021 +// Modified: +// +// Purpose: Stores page table entries of cached address translations. +// Outputs the physical page number and access bits of the current +// virtual address on a TLB hit. +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbram #(parameter TLB_ENTRIES = 8) ( + input logic clk, reset, + input logic [`XLEN-1:0] PTE, + input logic [TLB_ENTRIES-1:0] Matches, WriteEnables, + output logic [`PPN_BITS-1:0] PPN, + output logic [7:0] PTEAccessBits, + output logic [TLB_ENTRIES-1:0] PTE_Gs +); + + logic [`PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0]; + logic [`PPN_BITS+9:0] PageTableEntry; + + // RAM implemented with array of flops and AND/OR read logic + tlbramline #(`PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0] + (.clk, .reset, .re(Matches), .we(WriteEnables), + .d(PTE[`PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs)); + or_rows #(TLB_ENTRIES, `PPN_BITS+10) PTEOr(RamRead, PageTableEntry); + + // Rename the bits read from the TLB RAM + assign PTEAccessBits = PageTableEntry[7:0]; + assign PPN = PageTableEntry[`PPN_BITS+9:10]; +endmodule diff --git a/src/mmu/tlb/tlbramline.sv b/src/mmu/tlb/tlbramline.sv new file mode 100644 index 000000000..035c58d58 --- /dev/null +++ b/src/mmu/tlb/tlbramline.sv @@ -0,0 +1,43 @@ +/////////////////////////////////////////// +// tlbramline.sv +// +// Written: David_Harris@hmc.edu 4 July 2021 +// Modified: +// +// Purpose: One line of the RAM, with enabled flip-flop and logic for reading into distributed OR +// +// Documentation: RISC-V System on Chip Design Chapter 8 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module tlbramline #(parameter WIDTH = 22) + (input logic clk, reset, + input logic re, we, + input logic [WIDTH-1:0] d, + output logic [WIDTH-1:0] q, + output logic PTE_G); + + logic [WIDTH-1:0] line; + + flopenr #(WIDTH) pteflop(clk, reset, we, d, line); + assign q = re ? line : 0; + assign PTE_G = line[5]; // send global bit to CAM as part of ASID matching +endmodule diff --git a/src/mmu/vm64check.sv b/src/mmu/tlb/vm64check.sv similarity index 90% rename from src/mmu/vm64check.sv rename to src/mmu/tlb/vm64check.sv index a78b853e1..5f12eef7a 100644 --- a/src/mmu/vm64check.sv +++ b/src/mmu/tlb/vm64check.sv @@ -32,7 +32,7 @@ module vm64check ( input logic [`SVMODE_BITS-1:0] SATP_MODE, input logic [`XLEN-1:0] VAdr, output logic SV39Mode, - output logic UpperBitsUnequalPageFault + output logic UpperBitsUnequal ); if (`XLEN == 64) begin @@ -42,9 +42,9 @@ module vm64check ( logic eq_63_47, eq_46_38; assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]); assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; + assign UpperBitsUnequal = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; end else begin assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; + assign UpperBitsUnequal = 0; end endmodule From 099267ffcea29b7ada4c18eb307ba0667617cd7e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 17:31:03 -0800 Subject: [PATCH 23/55] moved tlb to subdirectory --- src/mmu/tlb.sv | 121 ------------------------------------------ src/mmu/tlbcam.sv | 62 ---------------------- src/mmu/tlbcamline.sv | 106 ------------------------------------ src/mmu/tlbcontrol.sv | 113 --------------------------------------- src/mmu/tlblru.sv | 56 ------------------- src/mmu/tlbmixer.sv | 69 ------------------------ src/mmu/tlbram.sv | 54 ------------------- src/mmu/tlbramline.sv | 43 --------------- 8 files changed, 624 deletions(-) delete mode 100644 src/mmu/tlb.sv delete mode 100644 src/mmu/tlbcam.sv delete mode 100644 src/mmu/tlbcamline.sv delete mode 100644 src/mmu/tlbcontrol.sv delete mode 100644 src/mmu/tlblru.sv delete mode 100644 src/mmu/tlbmixer.sv delete mode 100644 src/mmu/tlbram.sv delete mode 100644 src/mmu/tlbramline.sv diff --git a/src/mmu/tlb.sv b/src/mmu/tlb.sv deleted file mode 100644 index f8bf0d178..000000000 --- a/src/mmu/tlb.sv +++ /dev/null @@ -1,121 +0,0 @@ -/////////////////////////////////////////// -// tlb.sv -// -// Written: jtorrey@hmc.edu 16 February 2021 -// Modified: kmacsaigoren@hmc.edu 1 June 2021 -// Implemented SV48 on top of SV39. This included adding the SvMode signal, -// and using it to decide the translate signal and get the virtual page number -// -// Purpose: Translation lookaside buffer -// Cache of virtural-to-physical address translations -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -/** - * SV32 specs - * ---------- - * Virtual address [31:0] (32 bits) - * [________________________________] - * |--VPN1--||--VPN0--||----OFF---| - * 10 10 12 - * - * Physical address [33:0] (34 bits) - * [__________________________________] - * |---PPN1---||--PPN0--||----OFF---| - * 12 10 12 - * - * Page Table Entry [31:0] (32 bits) - * [________________________________] - * |---PPN1---||--PPN0--|||DAGUXWRV - * 12 10 ^^ - * RSW(2) -- for OS - */ - -`include "wally-config.vh" - -// The TLB will have 2**ENTRY_BITS total entries -module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( - input logic clk, reset, - input logic [`SVMODE_BITS-1:0] SATP_MODE, // Current address translation mode - input logic [`ASID_BITS-1:0] SATP_ASID, - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor - input logic ReadAccess, - input logic WriteAccess, - input logic DisableTranslation, - input logic [`XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) - input logic [`XLEN-1:0] PTE, - input logic [1:0] PageTypeWriteVal, - input logic TLBWrite, - input logic TLBFlush, - output logic [`PA_BITS-1:0] TLBPAdr, - output logic TLBMiss, - output logic TLBHit, - output logic Translate, - output logic TLBPageFault, - output logic DAPageFault -); - - logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex - // Sections of the virtual and physical addresses - logic [`VPN_BITS-1:0] VPN; - logic [`PPN_BITS-1:0] PPN; - // Sections of the page table entry - logic [7:0] PTEAccessBits; - logic [1:0] HitPageType; - logic CAMHit; - logic SV39Mode; - logic Misaligned; - logic MegapageMisaligned; - - if(`XLEN == 32) begin - assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 - assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; - end else begin // 64-bit - logic GigapageMisaligned, TerapageMisaligned; - assign TerapageMisaligned = |(PPN[26:0]); // must have zero PPN2, PPN1, PPN0 - assign GigapageMisaligned = |(PPN[17:0]); // must have zero PPN1 and PPN0 - assign MegapageMisaligned = |(PPN[8:0]); // must have zero PPN0 - assign Misaligned = ((HitPageType == 2'b11) & TerapageMisaligned) | - ((HitPageType == 2'b10) & GigapageMisaligned) | - ((HitPageType == 2'b01) & MegapageMisaligned); - end - - assign VPN = VAdr[`VPN_BITS+11:12]; - - tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, - .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, - .DAPageFault, .SV39Mode, .Translate); - - tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); - tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) - tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, - .SATP_ASID, .Matches, .HitPageType, .CAMHit); - tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); - - // Replace segments of the virtual page number with segments of the physical - // page number. For 4 KB pages, the entire virtual page number is replaced. - // For superpages, some segments are considered offsets into a larger page. - tlbmixer Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr); - -endmodule diff --git a/src/mmu/tlbcam.sv b/src/mmu/tlbcam.sv deleted file mode 100644 index 449411e20..000000000 --- a/src/mmu/tlbcam.sv +++ /dev/null @@ -1,62 +0,0 @@ -/////////////////////////////////////////// -// tlbcam.sv -// -// Written: jtorrey@hmc.edu 16 February 2021 -// Modified: kmacsaigoren@hmc.edu 1 June 2021 -// Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants -// Mostly this was to make the cam_lines work. -// -// Purpose: Stores virtual page numbers with cached translations. -// Determines whether a given virtual page number is in the TLB. -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbcam #(parameter TLB_ENTRIES = 8, KEY_BITS = 20, SEGMENT_BITS = 10) ( - input logic clk, reset, - input logic [`VPN_BITS-1:0] VPN, - input logic [1:0] PageTypeWriteVal, - input logic SV39Mode, - input logic TLBFlush, - input logic [TLB_ENTRIES-1:0] WriteEnables, - input logic [TLB_ENTRIES-1:0] PTE_Gs, - input logic [`ASID_BITS-1:0] SATP_ASID, - output logic [TLB_ENTRIES-1:0] Matches, - output logic [1:0] HitPageType, - output logic CAMHit -); - - logic [1:0] PageTypeRead [TLB_ENTRIES-1:0]; - - // TLB_ENTRIES CAM lines, each of which will independently consider - // whether the requested virtual address is a match. Each line stores the - // original virtual page number from when the address was written, regardless - // of page type. However, matches are determined based on a subset of the - // page number segments. - - tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( - .clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PageTypeWriteVal, .TLBFlush, - .WriteEnable(WriteEnables), .PageTypeRead, .Match(Matches)); - assign CAMHit = |Matches & ~TLBFlush; - or_rows #(TLB_ENTRIES,2) PageTypeOr(PageTypeRead, HitPageType); -endmodule - diff --git a/src/mmu/tlbcamline.sv b/src/mmu/tlbcamline.sv deleted file mode 100644 index 9f7a68e81..000000000 --- a/src/mmu/tlbcamline.sv +++ /dev/null @@ -1,106 +0,0 @@ -/////////////////////////////////////////// -// tlbcamline.sv -// -// Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 -// Modified: kmacsaigoren@hmc.edu 1 June 2021 -// Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants -// Mostly this was done to make the PageNumberMixer work. -// -// Purpose: CAM line for the translation lookaside buffer (TLB) -// Determines whether a virtual page number matches the stored key. -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbcamline #(parameter KEY_BITS = 20, SEGMENT_BITS = 10) ( - input logic clk, reset, - input logic [`VPN_BITS-1:0] VPN, // The requested page number to compare against the key - input logic [`ASID_BITS-1:0] SATP_ASID, - input logic SV39Mode, - input logic WriteEnable, // Write a new entry to this line - input logic PTE_G, - input logic [1:0] PageTypeWriteVal, - input logic TLBFlush, // Flush this line (set valid to 0) - output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? - output logic Match -); - - // PageTypeRead is a key for a tera, giga, mega, or kilopage. - // PageType == 2'b00 --> kilopage - // PageType == 2'b01 --> megapage - // PageType == 2'b10 --> gigapage - // PageType == 2'b11 --> terapage - - // This entry has KEY_BITS for the key plus one valid bit. - logic Valid; - logic [KEY_BITS-1:0] Key; - logic [1:0] PageType; - - // Split up key and query into sections for each page table level. - logic [`ASID_BITS-1:0] Key_ASID; - logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; - logic MatchASID, Match0, Match1; - - assign MatchASID = (SATP_ASID == Key_ASID) | PTE_G; - - if (`XLEN == 32) begin: match - - assign {Key_ASID, Key1, Key0} = Key; - assign {Query1, Query0} = VPN; - - // Calculate the actual match value based on the input vpn and the page type. - // For example, a megapage in SV32 only cares about VPN[1], so VPN[0] - // should automatically match. - assign Match0 = (Query0 == Key0) | (PageType[0]); // least signifcant section - assign Match1 = (Query1 == Key1); - - assign Match = Match0 & Match1 & MatchASID & Valid; - end else begin: match - - logic [SEGMENT_BITS-1:0] Key2, Key3, Query2, Query3; - logic Match2, Match3; - - assign {Query3, Query2, Query1, Query0} = VPN; - assign {Key_ASID, Key3, Key2, Key1, Key0} = Key; - - // Calculate the actual match value based on the input vpn and the page type. - // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] - // should automatically match. - assign Match0 = (Query0 == Key0) | (PageType > 2'd0); // least signifcant section - assign Match1 = (Query1 == Key1) | (PageType > 2'd1); - assign Match2 = (Query2 == Key2) | (PageType > 2'd2); - assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used - - assign Match = Match0 & Match1 & Match2 & Match3 & MatchASID & Valid; - end - - // On a write, update the type of the page referred to by this line. - flopenr #(2) pagetypeflop(clk, reset, WriteEnable, PageTypeWriteVal, PageType); - assign PageTypeRead = PageType & {2{Match}}; - - // On a write, set the valid bit high and update the stored key. - // On a flush, zero the valid bit and leave the key unchanged. - // *** Might we want to update stored key right away to output match on the - // write cycle? (using a mux) - flopenr #(1) validbitflop(clk, reset, WriteEnable | TLBFlush, ~TLBFlush, Valid); - flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, {SATP_ASID, VPN}, Key); -endmodule diff --git a/src/mmu/tlbcontrol.sv b/src/mmu/tlbcontrol.sv deleted file mode 100644 index 04af0f2a9..000000000 --- a/src/mmu/tlbcontrol.sv +++ /dev/null @@ -1,113 +0,0 @@ -/////////////////////////////////////////// -// tlbcontrol.sv -// -// Written: David_Harris@hmc.edu 5 July 2021 -// Modified: -// -// Purpose: Control signals for TLB -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbcontrol #(parameter ITLB = 0) ( - input logic [`SVMODE_BITS-1:0] SATP_MODE, - input logic [`XLEN-1:0] VAdr, - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor - input logic ReadAccess, WriteAccess, - input logic DisableTranslation, - input logic TLBFlush, // Invalidate all TLB entries - input logic [7:0] PTEAccessBits, - input logic CAMHit, - input logic Misaligned, - output logic TLBMiss, - output logic TLBHit, - output logic TLBPageFault, - output logic DAPageFault, - output logic SV39Mode, - output logic Translate -); - - // Sections of the page table entry - logic [1:0] EffectivePrivilegeMode; - - logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits - logic UpperBitsUnequalPageFault; - logic TLBAccess; - logic ImproperPrivilege; - - // Grab the sv mode from SATP and determine whether translation should occur - assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 - assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; - - // Determine whether TLB is being used - assign TLBAccess = ReadAccess | WriteAccess; - - // Check that upper bits are legal (all 0s or all 1s) - vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); - - // unswizzle useful PTE bits - assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; - assign {PTE_U, PTE_X, PTE_W, PTE_R, PTE_V} = PTEAccessBits[4:0]; - - // Check whether the access is allowed, page faulting if not. - if (ITLB == 1) begin:itlb // Instruction TLB fault checking - // User mode may only execute user mode pages, and supervisor mode may - // only execute non-user mode pages. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - if(`SVADU_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end else begin - // fault for software handling if access bit is off - assign DAPageFault = ~PTE_A; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end - end else begin:dtlb // Data TLB fault checking - logic InvalidRead, InvalidWrite; - - // User mode may only load/store from user mode pages, and supervisor mode - // may only access user mode pages when STATUS_SUM is low. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U & ~STATUS_SUM); - // Check for read error. Reads are invalid when the page is not readable - // (and executable pages are not readable) or when the page is neither - // readable nor executable (and executable pages are readable). - assign InvalidRead = ReadAccess & ~PTE_R & (~STATUS_MXR | ~PTE_X); - // Check for write error. Writes are invalid when the page's write bit is - // low. - assign InvalidWrite = WriteAccess & ~PTE_W; - if(`SVADU_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end else begin - // Fault for software handling if access bit is off or writing a page with dirty bit off - assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); - end - end - - assign TLBHit = CAMHit & TLBAccess; - assign TLBMiss = ~CAMHit & TLBAccess & Translate ; -endmodule diff --git a/src/mmu/tlblru.sv b/src/mmu/tlblru.sv deleted file mode 100644 index 6cdb475e3..000000000 --- a/src/mmu/tlblru.sv +++ /dev/null @@ -1,56 +0,0 @@ -/////////////////////////////////////////// -// tlblru.sv -// -// Written: tfleming@hmc.edu & jtorrey@hmc.edu 16 February 2021 -// Modified: -// -// Purpose: Implementation of bit pseudo least-recently-used algorithm for -// cache evictions. Outputs the index of the next entry to be written. -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module tlblru #(parameter TLB_ENTRIES = 8) ( - input logic clk, reset, - input logic TLBWrite, - input logic TLBFlush, - input logic [TLB_ENTRIES-1:0] Matches, - input logic CAMHit, - output logic [TLB_ENTRIES-1:0] WriteEnables -); - - logic [TLB_ENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; - logic [TLB_ENTRIES-1:0] WriteLines; - logic [TLB_ENTRIES-1:0] AccessLines; // One-hot encodings of which line is being accessed - logic AllUsed; // High if the next access causes all RU bits to be 1 - - // Find the first line not recently used - priorityonehot #(TLB_ENTRIES) nru(.a(~RUBits), .y(WriteLines)); - - // Track recently used lines, updating on a CAM Hit or TLB write - assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}}; - assign AccessLines = TLBWrite ? WriteLines : Matches; - assign RUBitsAccessed = AccessLines | RUBits; - assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none - assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; - - // enable must be ORd with TLBFlush to ensure flop fires on a flush. DH 7/8/21 - flopenrc #(TLB_ENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit | TLBWrite), RUBitsNext, RUBits); -endmodule diff --git a/src/mmu/tlbmixer.sv b/src/mmu/tlbmixer.sv deleted file mode 100644 index f5555c2a1..000000000 --- a/src/mmu/tlbmixer.sv +++ /dev/null @@ -1,69 +0,0 @@ -/////////////////////////////////////////// -// tlbmixer.sv -// -// Written: David Harris and kmacsaigoren@hmc.edu 7 June 2021 -// Modified: -// -// -// Purpose: Takes two page numbers and replaces segments of the first page -// number with segments from the second, based on the page type. -// NOTE: this DOES NOT include the 12 bit offset, which is the same no matter the translation mode or page type. -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbmixer ( - input logic [`VPN_BITS-1:0] VPN, - input logic [`PPN_BITS-1:0] PPN, - input logic [1:0] HitPageType, - input logic [11:0] Offset, - input logic TLBHit, - output logic [`PA_BITS-1:0] TLBPAdr -); - - localparam EXTRA_BITS = `PPN_BITS - `VPN_BITS; - logic [`PPN_BITS-1:0] ZeroExtendedVPN; - logic [`PPN_BITS-1:0] PageNumberMask; - logic [`PPN_BITS-1:0] PPNMixed; - - // produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages - if (`XLEN == 32) - // kilopage: 22 bits of PPN, 0 bits of VPN - // megapage: 12 bits of PPN, 10 bits of VPN - mux2 #(22) pnm(22'h000000, 22'h0003FF, HitPageType[0], PageNumberMask); - else - // kilopage: 44 bits of PPN, 0 bits of VPN - // megapage: 35 bits of PPN, 9 bits of VPN - // gigapage: 26 bits of PPN, 18 bits of VPN - // terapage: 17 bits of PPN, 27 bits of VPN - mux4 #(44) pnm(44'h00000000000, 44'h000000001FF, 44'h0000003FFFF, 44'h00007FFFFFF, HitPageType, PageNumberMask); - - // merge low segments of VPN with high segments of PPN decided by the pagetype. - assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. - assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // - //mux2 #(1) mixmux[`PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed); - //assign PPNMixed = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); - // Output the hit physical address if translation is currently on. - // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal - mux2 #(`PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system - -endmodule diff --git a/src/mmu/tlbram.sv b/src/mmu/tlbram.sv deleted file mode 100644 index febb8b6f6..000000000 --- a/src/mmu/tlbram.sv +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////// -// tlbram.sv -// -// Written: jtorrey@hmc.edu & tfleming@hmc.edu 16 February 2021 -// Modified: -// -// Purpose: Stores page table entries of cached address translations. -// Outputs the physical page number and access bits of the current -// virtual address on a TLB hit. -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbram #(parameter TLB_ENTRIES = 8) ( - input logic clk, reset, - input logic [`XLEN-1:0] PTE, - input logic [TLB_ENTRIES-1:0] Matches, WriteEnables, - output logic [`PPN_BITS-1:0] PPN, - output logic [7:0] PTEAccessBits, - output logic [TLB_ENTRIES-1:0] PTE_Gs -); - - logic [`PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0]; - logic [`PPN_BITS+9:0] PageTableEntry; - - // RAM implemented with array of flops and AND/OR read logic - tlbramline #(`PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0] - (.clk, .reset, .re(Matches), .we(WriteEnables), - .d(PTE[`PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs)); - or_rows #(TLB_ENTRIES, `PPN_BITS+10) PTEOr(RamRead, PageTableEntry); - - // Rename the bits read from the TLB RAM - assign PTEAccessBits = PageTableEntry[7:0]; - assign PPN = PageTableEntry[`PPN_BITS+9:10]; -endmodule diff --git a/src/mmu/tlbramline.sv b/src/mmu/tlbramline.sv deleted file mode 100644 index 035c58d58..000000000 --- a/src/mmu/tlbramline.sv +++ /dev/null @@ -1,43 +0,0 @@ -/////////////////////////////////////////// -// tlbramline.sv -// -// Written: David_Harris@hmc.edu 4 July 2021 -// Modified: -// -// Purpose: One line of the RAM, with enabled flip-flop and logic for reading into distributed OR -// -// Documentation: RISC-V System on Chip Design Chapter 8 -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module tlbramline #(parameter WIDTH = 22) - (input logic clk, reset, - input logic re, we, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - output logic PTE_G); - - logic [WIDTH-1:0] line; - - flopenr #(WIDTH) pteflop(clk, reset, we, d, line); - assign q = re ? line : 0; - assign PTE_G = line[5]; // send global bit to CAM as part of ASID matching -endmodule From 246deeda8298324b78462a46a90ea1296fea218e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 17:32:34 -0800 Subject: [PATCH 24/55] renamed UpperBitsUnequalPageFault to UpperBitsUnequal --- src/mmu/hptw.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 19a3aca79..f77ca0c98 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -127,8 +127,8 @@ module hptw ( if(`SVADU_SUPPORTED) begin : hptwwrites logic ReadAccess, WriteAccess; - logic InvalidRead, InvalidWrite; - logic UpperBitsUnequalPageFault; + logic InvalidRead, InvalidWrite, InvalidOp; + logic UpperBitsUnequal; logic OtherPageFault; logic [1:0] EffectivePrivilegeMode; logic ImproperPrivilege; @@ -147,7 +147,7 @@ module hptw ( mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); assign {Dirty, Accessed} = PTE[7:6]; - assign WriteAccess = MemRWM[0] | (|AtomicM); + assign WriteAccess = MemRWM[0]; // implies | (|AtomicM); assign SetDirty = ~Dirty & DTLBWalk & WriteAccess; assign ReadAccess = MemRWM[1]; @@ -157,11 +157,11 @@ module hptw ( // Check for page faults vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), - .SV39Mode(), .UpperBitsUnequalPageFault); + .SV39Mode(), .UpperBitsUnequal); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; - assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : - ImproperPrivilege | ~Executable | UpperBitsUnequalPageFault | Misaligned | ~Valid; + assign InvalidOp = DTLBWalk ? (InvalidRead | InvalidWrite) : ~Executable; + assign OtherPageFault = ImproperPrivilege | InvalidOp | UpperBitsUnequal | Misaligned | ~Valid; // hptw needs to know if there is a Dirty or Access fault occuring on this // memory access. If there is the PTE needs to be updated seting Access From d2fd34efe6c3a85150c3493614067135ba3372ef Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 17:51:45 -0800 Subject: [PATCH 25/55] Renamed DAPageFault to UpdateDA --- src/ifu/ifu.sv | 8 ++++---- src/ifu/spill.sv | 4 ++-- src/lsu/lsu.sv | 8 ++++---- src/mmu/hptw.sv | 22 +++++++++++----------- src/mmu/mmu.sv | 4 ++-- src/mmu/tlb/tlb.sv | 4 ++-- src/mmu/tlb/tlbcontrol.sv | 16 ++++++++-------- src/wally/wallypipelinedcore.sv | 6 +++--- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 71221ef63..887d1f45c 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -88,7 +88,7 @@ module ifu ( input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - output logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault @@ -145,7 +145,7 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCNextFSpill = PCNextF; assign PCFSpill = PCF; @@ -185,12 +185,12 @@ module ifu ( .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), - .DAPageFault(InstrDAPageFaultF), + .UpdateDA(InstrUpdateDAF), .AtomicAccessM(1'b0),.ExecuteAccessF(1'b1), .WriteAccessM(1'b0), .ReadAccessM(1'b0), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); end else begin - assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrDAPageFaultF} = '0; + assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = '0; assign PCPF = PCFExt[`PA_BITS-1:0]; assign CacheableF = '1; assign SelIROM = '0; diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index a5b274633..4b89a3cef 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -42,7 +42,7 @@ module spill #( input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request - input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) + input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline @@ -77,7 +77,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrDAPageFaultF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e01de3128..18383e0dd 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -81,7 +81,7 @@ module lsu ( input logic [1:0] STATUS_MPP, // Machine previous privilege mode input logic [`XLEN-1:0] PCFSpill, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB output logic [1:0] PageType, // Type of page table entry to write to ITLB output logic ITLBWriteF, // Write PTE to ITLB @@ -127,7 +127,7 @@ module lsu ( logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB - logic DataDAPageFaultM; // DTLB hit needs to update dirty or access bits + logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits logic LSULoadAccessFaultM; // Load acces fault logic LSUStoreAmoAccessFaultM; // Store access fault logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle @@ -151,7 +151,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, - .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, + .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, .FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN @@ -196,7 +196,7 @@ module lsu ( .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. - .DAPageFault(DataDAPageFaultM), + .UpdateDA(DataUpdateDAM), .AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), .WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index f77ca0c98..d7f8ccdcc 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -49,8 +49,8 @@ module hptw ( input logic ITLBMissF, input logic DTLBMissM, input logic FlushW, - input logic InstrDAPageFaultF, - input logic DataDAPageFaultM, + input logic InstrUpdateDAF, + input logic DataUpdateDAM, output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry @@ -87,7 +87,7 @@ module hptw ( logic [`XLEN-1:0] TranslationVAdr; logic [`XLEN-1:0] NextPTE; logic UpdatePTE; - logic HPTWDAPageFault; + logic HPTWUpdateDA; logic [`PA_BITS-1:0] HPTWReadAdr; logic SelHPTWAdr; logic [`XLEN+1:0] HPTWAdrExt; @@ -167,14 +167,14 @@ module hptw ( // memory access. If there is the PTE needs to be updated seting Access // and possibly also Dirty. Dirty is set if the operation is a store/amo. // However any other fault should not cause the update. - assign HPTWDAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; + assign HPTWUpdateDA = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; assign HPTWRW[0] = (WalkerState == UPDATE_PTE); - assign UpdatePTE = (WalkerState == LEAF) & HPTWDAPageFault; + assign UpdatePTE = (WalkerState == LEAF) & HPTWUpdateDA; end else begin // block: hptwwrites assign NextPTE = ReadDataM; assign HPTWAdr = HPTWReadAdr; - assign HPTWDAPageFault = '0; + assign HPTWUpdateDA = '0; assign UpdatePTE = '0; assign HPTWRW[0] = '0; end @@ -182,8 +182,8 @@ module hptw ( // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign DTLBWriteM = (WalkerState == LEAF & ~HPTWDAPageFault) & DTLBWalk; - assign ITLBWriteF = (WalkerState == LEAF & ~HPTWDAPageFault) & ~DTLBWalk; + assign DTLBWriteM = (WalkerState == LEAF & ~HPTWUpdateDA) & DTLBWalk; + assign ITLBWriteF = (WalkerState == LEAF & ~HPTWUpdateDA) & ~DTLBWalk; // FSM to track PageType based on the levels of the page table traversed flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); @@ -262,7 +262,7 @@ module hptw ( else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - LEAF: if (`SVADU_SUPPORTED & HPTWDAPageFault) NextWalkerState = UPDATE_PTE; + LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; @@ -273,8 +273,8 @@ module hptw ( assign SelHPTW = WalkerState != IDLE; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); - assign ITLBMissOrDAFaultF = ITLBMissF | (`SVADU_SUPPORTED & InstrDAPageFaultF); - assign DTLBMissOrDAFaultM = DTLBMissM | (`SVADU_SUPPORTED & DataDAPageFaultM); + assign ITLBMissOrDAFaultF = ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF); + assign DTLBMissOrDAFaultM = DTLBMissM | (`SVADU_SUPPORTED & DataUpdateDAM); // HTPW address/data/control muxing diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 0193a5478..728b6b80b 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -51,7 +51,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources output logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM, // page fault sources - output logic DAPageFault, // page fault due to setting dirty or access bit + output logic UpdateDA, // page fault due to setting dirty or access bit output logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned fault sources // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // access type @@ -84,7 +84,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, - .Translate, .TLBPageFault, .DAPageFault); + .Translate, .TLBPageFault, .UpdateDA); end else begin:tlb// just pass address through as physical assign Translate = 0; assign TLBMiss = 0; diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index f8bf0d178..7d6cd317f 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -72,7 +72,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( output logic TLBHit, output logic Translate, output logic TLBPageFault, - output logic DAPageFault + output logic UpdateDA ); logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex @@ -105,7 +105,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, - .DAPageFault, .SV39Mode, .Translate); + .UpdateDA, .SV39Mode, .Translate); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 45c56f1b7..9754124da 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -43,7 +43,7 @@ module tlbcontrol #(parameter ITLB = 0) ( output logic TLBMiss, output logic TLBHit, output logic TLBPageFault, - output logic DAPageFault, + output logic UpdateDA, output logic SV39Mode, output logic Translate ); @@ -77,12 +77,12 @@ module tlbcontrol #(parameter ITLB = 0) ( assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U); if(`SVADU_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V)); + assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V); end else begin // fault for software handling if access bit is off - assign DAPageFault = ~PTE_A; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequal | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V); end end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; @@ -99,12 +99,12 @@ module tlbcontrol #(parameter ITLB = 0) ( // low. assign InvalidWrite = WriteAccess & ~PTE_W; if(`SVADU_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; + assign UpdateDA = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequal | Misaligned | ~PTE_V)); end else begin // Fault for software handling if access bit is off or writing a page with dirty bit off - assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequal | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V)); end end diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 02074f973..f94986349 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -156,7 +156,7 @@ module wallypipelinedcore ( logic ICacheMiss; logic ICacheAccess; logic BreakpointFaultM, EcallFaultM; - logic InstrDAPageFaultF; + logic InstrUpdateDAF; logic BigEndianM; logic FCvtIntE; logic CommittedF; @@ -184,7 +184,7 @@ module wallypipelinedcore ( .PrivilegeModeW, .PTE, .PageType, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ITLBWriteF, .sfencevmaM, .ITLBMissF, // pmp/pma (inside mmu) signals. - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrDAPageFaultF); + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF); // integer execution unit: integer register file, datapath and controller ieu ieu(.clk, .reset, @@ -238,7 +238,7 @@ module wallypipelinedcore ( .HPTWInstrAccessFaultM, // connects to privilege .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege - .InstrDAPageFaultF, + .InstrUpdateDAF, .PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); From 3804626166711c931d97d36b614513639c082b81 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 26 Feb 2023 20:20:30 -0600 Subject: [PATCH 26/55] Create module for instruction class prediction and decoding. --- src/ifu/bpred/RASPredictor.sv | 28 ++++----- src/ifu/bpred/bpred.sv | 105 +++++++++------------------------ src/ifu/bpred/icpred.sv | 107 ++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 90 deletions(-) create mode 100644 src/ifu/bpred/icpred.sv diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 5f14a028e..72c594556 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,11 +33,11 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic WrongBPRetD, // Prediction class is wrong - input logic RetD, - input logic RetE, JalE, // Instr class - input logic BPRetF, - input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal + input logic WrongBPReturnD, // Prediction class is wrong + input logic ReturnD, + input logic ReturnE, CallE, // Instr class + input logic BPReturnF, + input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -54,21 +54,21 @@ module RASPredictor #(parameter int StackSize = 16 )( logic IncrRepairD, DecRepairD; logic DecrementPtr; - logic FlushedRetDE; - logic WrongPredRetD; + logic FlushedReturnDE; + logic WrongPredReturnD; - assign PopF = BPRetF & ~StallD & ~FlushD; - assign PushE = JalE & ~StallM & ~FlushM; + assign PopF = BPReturnF & ~StallD & ~FlushD; + assign PushE = CallE & ~StallM & ~FlushM; - assign WrongPredRetD = (WrongBPRetD) & ~StallE & ~FlushE; - assign FlushedRetDE = (~StallE & FlushE & RetD) | (~StallM & FlushM & RetE); // flushed ret + assign WrongPredReturnD = (WrongBPReturnD) & ~StallE & ~FlushE; + assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (~StallM & FlushM & ReturnE); // flushed return - assign RepairD = WrongPredRetD | FlushedRetDE ; + assign RepairD = WrongPredReturnD | FlushedReturnDE ; - assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~RetD); // Guessed it was a ret, but its not + assign IncrRepairD = FlushedReturnDE | (WrongPredReturnD & ~ReturnD); // Guessed it was a return, but its not - assign DecRepairD = WrongPredRetD & RetD; // Guessed non ret but is a ret. + assign DecRepairD = WrongPredReturnD & ReturnD; // Guessed non return but is a return. assign CounterEn = PopF | PushE | RepairD; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 02e3e1aa2..0aa1bc038 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -58,8 +58,8 @@ module bpred ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br + output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status output logic BPPredWrongE, // Prediction is wrong @@ -88,14 +88,14 @@ module bpred ( logic [`XLEN-1:0] BTAD; - logic BTBJalF, BTBRetF, BTBJumpF, BTBBranchF; - logic BPBranchF, BPJumpF, BPRetF, BPJalF; - logic BPBranchD, BPJumpD, BPRetD, BPJalD; - logic RetD, JalD; - logic RetE, JalE; - logic BranchM, JumpM, RetM, JalM; - logic BranchW, JumpW, RetW, JalW; - logic WrongBPRetD; + logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPReturnF, BPCallF; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + logic ReturnD, CallD; + logic ReturnE, CallE; + logic BranchM, JumpM, ReturnM, CallM; + logic BranchW, JumpW, ReturnW, CallW; + logic WrongBPReturnD; logic [`XLEN-1:0] PCW, IEUAdrW; // Part 1 branch direction prediction @@ -150,72 +150,28 @@ module bpred ( TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .BTAF, .BTAD, - .BTBIClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), + .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, .IEUAdrE, .IEUAdrM, .IEUAdrW, - .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}), - .InstrClassW({JalW, RetW, JumpW, BranchW})); + .InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), .InstrClassM({CallM, ReturnM, JumpM, BranchM}), + .InstrClassW({CallW, ReturnW, JumpW, BranchW})); - if (!`INSTR_CLASS_PRED) begin : DirectClassDecode - // This section is mainly for testing, verification, and PPA comparison. - // An alternative to using the BTB to store the instruction class is to partially decode - // the instructions in the Fetch stage into, Jal, Ret, Jump, and Branch instructions. - // This logic is not described in the text book as of 23 February 2023. - logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; - logic NCJumpF, NCBranchF; - if(`C_SUPPORTED) begin - logic [4:0] CompressedOpcF; - assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; - assign cj = CompressedOpcF == 5'h0d; - assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign CJumpF = cjal | cj | cjr | cjalr; - assign CBranchF = CompressedOpcF[4:1] == 4'h7; - end else begin - assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; - end + icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, + .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, .BTBBranchF, + .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); - assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; - assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; - - assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); - assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); - assign BPRetF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - - assign BPJalF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - - end else begin - // This section connects the BTB's instruction class prediction. - assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; - end - assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; - - // Part 3 RAS - RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .BPRetF, .RetD, .RetE, .JalE, - .WrongBPRetD, .RASPCF, .PCLinkE); - - assign BPPredPCF = BPRetF ? RASPCF : BTAF; - - assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - - flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {JalE, RetE, JumpE, BranchE}, {JalM, RetM, JumpM, BranchM}); - flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {JalM, RetM, JumpM, BranchM}, {JalW, RetW, JumpW, BranchW}); + assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); - // branch predictor - flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + // Part 3 RAS + RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, + .BPReturnF, .ReturnD, .ReturnE, .CallE, + .WrongBPReturnD, .RASPCF, .PCLinkE); + + assign BPPredPCF = BPReturnF ? RASPCF : BTAF; - // pipeline the predicted class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPJalF, BPRetF, BPJumpF, BPBranchF}, {BPJalD, BPRetD, BPJumpD, BPBranchD}); - // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if the class prediction is wrong a regular instruction may have been predicted as a taken branch @@ -223,17 +179,14 @@ module bpred ( // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. assign PredictionPCWrongE = PCCorrectE != PCD; - - // branch class prediction wrong. - assign AnyWrongPredInstrClassD = |({BPJalD, BPRetD, BPJumpD, BPBranchD} ^ {JalD, RetD, JumpD, BranchD}); - assign WrongBPRetD = BPRetD ^ RetD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; + // *** clean up old signal names for testing. logic BPPredWrongEAlt; logic NotMatch; - assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark + assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD; assign NotMatch = BPPredWrongE != BPPredWrongEAlt; // Output the predicted PC or corrected PC on miss-predict. @@ -263,8 +216,8 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~RetE) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & RetE & PCSrcE; + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; @@ -283,7 +236,7 @@ module bpred ( end // **** Fix me - assign InstrClassM = {JalM, RetM, JumpM, BranchM}; + assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv new file mode 100644 index 000000000..cd6772ba2 --- /dev/null +++ b/src/ifu/bpred/icpred.sv @@ -0,0 +1,107 @@ +/////////////////////////////////////////// +// icpred.sv +// +// Written: Ross Thomposn ross1728@gmail.com +// Created: February 26, 2023 +// Modified: February 26, 2023 +// +// Purpose: Partial decode of instructions into control flow instructions (cfi)P +// Call, Return, Jump, and Branch +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +`define INSTR_CLASS_PRED 1 + +module icpred ( + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction + input logic BranchD, BranchE, + input logic JumpD, JumpE, + output logic BranchM, BranchW, + output logic JumpM, JumpW, + output logic CallD, CallE, CallM, CallW, + output logic ReturnD, ReturnE, ReturnM, ReturnW, + input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, + output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, + output logic PredictionInstrClassWrongM, WrongBPReturnD +); + + logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + + if (!`INSTR_CLASS_PRED) begin : DirectClassDecode + // This section is mainly for testing, verification, and PPA comparison. + // An alternative to using the BTB to store the instruction class is to partially decode + // the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions. + // This logic is not described in the text book as of 23 February 2023. + logic ccall, cj, cjr, ccallr, CJumpF, CBranchF; + logic NCJumpF, NCBranchF; + + if(`C_SUPPORTED) begin + logic [4:0] CompressedOpcF; + assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; + assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32; + assign cj = CompressedOpcF == 5'h0d; + assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = ccall | cj | cjr | ccallr; + assign CBranchF = CompressedOpcF[4:1] == 4'h7; + end else begin + assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0; + end + + assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; + + assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); + assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5 + (`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + + assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5 + (`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + + end else begin + // This section connects the BTB's instruction class prediction. + assign {BPCallF, BPReturnF, BPJumpF, BPBranchF} = {BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}; + end + + assign ReturnD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // returnurn must returnurn to ra or x5 + assign CallD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // call(r) must link to ra or x5 + + flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {CallD, ReturnD}, {CallE, ReturnE}); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {CallE, ReturnE, JumpE, BranchE}, {CallM, ReturnM, JumpM, BranchM}); + flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {CallM, ReturnM, JumpM, BranchM}, {CallW, ReturnW, JumpW, BranchW}); + + // branch predictor + flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + + // pipeline the predicted class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPCallF, BPReturnF, BPJumpF, BPBranchF}, {BPCallD, BPReturnD, BPJumpD, BPBranchD}); + + // branch class prediction wrong. + assign AnyWrongPredInstrClassD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); + assign WrongBPReturnD = BPReturnD ^ ReturnD; + +endmodule From d3f5708ded113d79927664c334866d3fdc26f0f2 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 18:35:10 -0800 Subject: [PATCH 27/55] StoreAmo faults are generated instead of load faults on AMO operations --- src/mmu/mmu.sv | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 728b6b80b..e3cd8031e 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -70,6 +70,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( logic Translate; // Translation occurs when virtual memory is active and DisableTranslation is off logic TLBHit; // Hit in TLB logic TLBPageFault; // Page fault from TLB + logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults // only instantiate TLB if Virtual Memory is supported if (`VIRTMEM_SUPPORTED) begin:tlb @@ -118,6 +119,8 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( assign PMPLoadAccessFaultM = 0; end + assign ReadNoAmoAccessM = ReadAccessM & ~WriteAccessM;// AMO causes StoreAmo rather than Load fault + // Access faults // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~TLBMiss; @@ -132,11 +135,11 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( 2'b10: DataMisalignedM = VAdr[1] | VAdr[0]; // lw, sw, flw, fsw, lwu 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase - assign LoadMisalignedFaultM = DataMisalignedM & ReadAccessM; - assign StoreAmoMisalignedFaultM = DataMisalignedM & (WriteAccessM | AtomicAccessM); + assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM; + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM; // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; - assign LoadPageFaultM = TLBPageFault & ReadAccessM; - assign StoreAmoPageFaultM = TLBPageFault & (WriteAccessM | AtomicAccessM); + assign LoadPageFaultM = TLBPageFault & ReadNoAmoAccessM; + assign StoreAmoPageFaultM = TLBPageFault & WriteAccessM; endmodule From 907fbfec382a3b8c5b3882b27a13de80b3e68203 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 18:50:37 -0800 Subject: [PATCH 28/55] Simplified Access fault logic in HPTW --- src/mmu/hptw.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index d7f8ccdcc..248bceeb0 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -93,15 +93,17 @@ module hptw ( logic [`XLEN+1:0] HPTWAdrExt; logic ITLBMissOrDAFaultF; logic DTLBMissOrDAFaultM; + logic LSUAccessFaultM; logic [`PA_BITS-1:0] HPTWAdr; logic [1:0] HPTWRW; logic [2:0] HPTWSize; // 32 or 64 bit access statetype WalkerState, NextWalkerState, InitialWalkerState; // map hptw access faults onto either the original LSU load/store fault or instruction access fault - assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0]; - assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0]; - assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: (LSUStoreAmoAccessFaultM | LSULoadAccessFaultM) & ~DTLBWalk; + assign LSUAccessFault = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; + assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; + assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0]; + assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk; // Extract bits from CSRs and inputs assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; From 447f6b144348381090a51185ad24c03b5ac89355 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 26 Feb 2023 21:28:36 -0600 Subject: [PATCH 29/55] Branch predictor cleanup. --- src/ifu/bpred/bpred.sv | 127 ++++++++++++++++++---------------------- src/ifu/bpred/btb.sv | 22 ++++--- src/ifu/bpred/icpred.sv | 2 +- 3 files changed, 72 insertions(+), 79 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 0aa1bc038..35fb3f5b6 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -70,33 +70,32 @@ module bpred ( output logic PredictionInstrClassWrongM // Class prediction is wrong ); - logic [1:0] BPDirPredF; + logic [1:0] BPDirPredF; - logic [`XLEN-1:0] BTAF, RASPCF; - logic PredictionPCWrongE; - logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic BPDirPredWrongE; + logic [`XLEN-1:0] BTAF, RASPCF; + logic PredictionPCWrongE; + logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; + logic BPDirPredWrongE; - logic BPPCSrcF; - logic [`XLEN-1:0] BPPredPCF; - logic [`XLEN-1:0] PCNext0F; - logic [`XLEN-1:0] PCCorrectE; - logic [3:0] WrongPredInstrClassD; + logic BPPCSrcF; + logic [`XLEN-1:0] BPPCF; + logic [`XLEN-1:0] PCNext0F; + logic [`XLEN-1:0] PCCorrectE; + logic [3:0] WrongPredInstrClassD; - logic BTBTargetWrongE; - logic RASTargetWrongE; + logic BTBTargetWrongE; + logic RASTargetWrongE; - logic [`XLEN-1:0] BTAD; + logic [`XLEN-1:0] BTAD; - logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; - logic BPBranchF, BPJumpF, BPReturnF, BPCallF; - logic BPBranchD, BPJumpD, BPReturnD, BPCallD; - logic ReturnD, CallD; - logic ReturnE, CallE; - logic BranchM, JumpM, ReturnM, CallM; - logic BranchW, JumpW, ReturnW, CallW; - logic WrongBPReturnD; - logic [`XLEN-1:0] PCW, IEUAdrW; + logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPReturnF, BPCallF; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + logic ReturnD, CallD; + logic ReturnE, CallE; + logic BranchM, JumpM, ReturnM, CallM; + logic BranchW, JumpW, ReturnW, CallW; + logic WrongBPReturnD; // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. @@ -148,30 +147,27 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BTAF, .BTAD, .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), .PredictionInstrClassWrongM, - .IEUAdrE, .IEUAdrM, .IEUAdrW, - .InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), .InstrClassM({CallM, ReturnM, JumpM, BranchM}), + .IEUAdrE, .IEUAdrM, + .InstrClassD({CallD, ReturnD, JumpD, BranchD}), + .InstrClassE({CallE, ReturnE, JumpE, BranchE}), + .InstrClassM({CallM, ReturnM, JumpM, BranchM}), .InstrClassW({CallW, ReturnW, JumpW, BranchW})); icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, - .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, .BTBBranchF, - .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); - - assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); + .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, .BPReturnF, .ReturnD, .ReturnE, .CallE, .WrongBPReturnD, .RASPCF, .PCLinkE); - assign BPPredPCF = BPReturnF ? RASPCF : BTAF; - // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if the class prediction is wrong a regular instruction may have been predicted as a taken branch @@ -179,19 +175,15 @@ module bpred ( // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. assign PredictionPCWrongE = PCCorrectE != PCD; - // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; - - // *** clean up old signal names for testing. - logic BPPredWrongEAlt; - logic NotMatch; - assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD; - assign NotMatch = BPPredWrongE != BPPredWrongEAlt; - + flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); + // Output the predicted PC or corrected PC on miss-predict. + assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; + mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF); // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PCNext0F); // If the prediction is wrong select the correct address. mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); // Correct branch/jump target. @@ -203,42 +195,39 @@ module bpred ( else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin - logic JumpOrTakenBranchE; - logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; - logic BTBPredPCWrongE, RASPredPCWrongE; - // performance counters - // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now - // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) - // 3. target ras (ras target wrong / class[2]) - // 4. direction (br dir wrong / class[0]) + logic JumpOrTakenBranchE; + logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; + logic BTBPredPCWrongE, RASPredPCWrongE; + // performance counters + // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now + // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) + // 3. target ras (ras target wrong / class[2]) + // 4. direction (br dir wrong / class[0]) - // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction - // could be wrong or the fall through address selected for branch predict not taken. - // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of - // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; + // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction + // could be wrong or the fall through address selected for branch predict not taken. + // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of + // both without the above inaccuracies. + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; - assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; - - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); + assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; + + flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); - flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); - flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); - + flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); + flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); + flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, + {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, + {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); + end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; + assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; end // **** Fix me assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; - flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); - flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); - endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 5ad92517d..b0d988943 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) ( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAD, output logic [3:0] BTBIClassF, // BTB's guess at instruction class @@ -42,20 +42,21 @@ module btb #(parameter Depth = 10 ) ( input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb - input logic [`XLEN-1:0] IEUAdrW, input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb input logic [3:0] InstrClassM, // Instruction class to insert into btb input logic [3:0] InstrClassW ); - logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; - logic [`XLEN-1:0] ResetPC; - logic MatchD, MatchE, MatchM, MatchW, MatchX; - logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+3:0] TableBTBPredF; - logic UpdateEn; - + logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; + logic [`XLEN-1:0] ResetPC; + logic MatchD, MatchE, MatchM, MatchW, MatchX; + logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredF; + logic UpdateEn; + logic [`XLEN-1:0] IEUAdrW; + logic [`XLEN-1:0] PCW; + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if @@ -94,5 +95,8 @@ module btb #(parameter Depth = 10 ) ( .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); + endmodule diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index cd6772ba2..78689806f 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -34,7 +34,7 @@ module icpred ( input logic clk, reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction + input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction input logic BranchD, BranchE, input logic JumpD, JumpE, output logic BranchM, BranchW, From 0d3d499940bcf772cfc2abcfd0f19f60bd44b8f5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 26 Feb 2023 19:38:34 -0800 Subject: [PATCH 30/55] hptw typo fix --- src/mmu/hptw.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 248bceeb0..f2df8ea92 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -100,7 +100,7 @@ module hptw ( statetype WalkerState, NextWalkerState, InitialWalkerState; // map hptw access faults onto either the original LSU load/store fault or instruction access fault - assign LSUAccessFault = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; + assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0]; assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk; From a81cc883e919e1707c51c8054747844d00e141e9 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 27 Feb 2023 00:39:19 -0600 Subject: [PATCH 31/55] Signal name changes. --- src/hazard/hazard.sv | 6 +++--- src/ifu/bpred/bpred.sv | 13 ++++++------- src/ifu/ifu.sv | 6 +++--- src/wally/wallypipelinedcore.sv | 6 +++--- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 650e8367d..85d23d373 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -30,7 +30,7 @@ module hazard ( // Detect hazards - input logic BPPredWrongE, CSRWriteFenceM, RetM, TrapM, + input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LSUStallM, IFUStallF, input logic FCvtIntStallD, FPUStallD, @@ -65,8 +65,8 @@ module hazard ( // Similarly, CSR writes and fences flush all subsequent instructions and refetch them in light of the new operating modes and cache/TLB contents // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete - assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPPredWrongE; - assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPPredWrongE & ~(DivBusyE | FDivBusyE)); + assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; + assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); assign FlushMCause = TrapM | RetM | CSRWriteFenceM; assign FlushWCause = TrapM; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 35fb3f5b6..7512e2d3b 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -62,7 +62,7 @@ module bpred ( output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong + output logic BPWrongE, // Prediction is wrong output logic BPPredWrongM, // Prediction is wrong output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong @@ -73,7 +73,7 @@ module bpred ( logic [1:0] BPDirPredF; logic [`XLEN-1:0] BTAF, RASPCF; - logic PredictionPCWrongE; + logic BPPCWrongE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic BPDirPredWrongE; @@ -157,7 +157,6 @@ module bpred ( .InstrClassM({CallM, ReturnM, JumpM, BranchM}), .InstrClassW({CallW, ReturnW, JumpW, BranchW})); - icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, @@ -174,10 +173,10 @@ module bpred ( // this will result in PCD not being equal to the fall through address PCLinkE (PCE+4). // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. - assign PredictionPCWrongE = PCCorrectE != PCD; + assign BPPCWrongE = PCCorrectE != PCD; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); + assign BPWrongE = BPPCWrongE & InstrValidE & InstrValidD; + flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPPredWrongM); // Output the predicted PC or corrected PC on miss-predict. assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; @@ -185,7 +184,7 @@ module bpred ( // Selects the BP or PC+2/4. mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PCNext0F); // If the prediction is wrong select the correct address. - mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); + mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPWrongE, PCNext1F); // Correct branch/jump target. mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index e13a08bf9..36c8d6256 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -54,7 +54,7 @@ module ifu ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address output logic [`XLEN-1:0] PCE, // Execution stage instruction address - output logic BPPredWrongE, // Prediction is wrong + output logic BPWrongE, // Prediction is wrong output logic BPPredWrongM, // Prediction is wrong // Mem output logic CommittedF, // I$ or bus memory operation started, delay interrupts @@ -331,12 +331,12 @@ module ifu ( .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); end else begin : bpred mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); - assign BPPredWrongE = PCSrcE; + assign BPWrongE = PCSrcE; assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index c4b83386d..a0acb2001 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -140,7 +140,7 @@ module wallypipelinedcore ( logic LSUHWRITE; logic LSUHREADY; - logic BPPredWrongE, BPPredWrongM; + logic BPWrongE, BPPredWrongM; logic BPDirPredWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; @@ -173,7 +173,7 @@ module wallypipelinedcore ( .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute - .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPPredWrongE, .BPPredWrongM, + .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPPredWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, @@ -268,7 +268,7 @@ module wallypipelinedcore ( // global stall and flush control hazard hzu( - .BPPredWrongE, .CSRWriteFenceM, .RetM, .TrapM, + .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, .FCvtIntStallD, .FPUStallD, From 5c8fee127bf8f608e5d672604772a3f66c8b3dfc Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 27 Feb 2023 07:29:53 -0800 Subject: [PATCH 32/55] Added support for ZMMUL --- config/buildroot/wally-config.vh | 1 + config/fpga/wally-config.vh | 1 + config/rv32e/wally-config.vh | 1 + config/rv32gc/wally-config.vh | 1 + config/rv32i/wally-config.vh | 1 + config/rv32imc/wally-config.vh | 1 + config/rv64fpquad/wally-config.vh | 1 + config/rv64gc/wally-config.vh | 1 + config/rv64i/wally-config.vh | 1 + src/ieu/controller.sv | 4 +- src/mdu/mdu.sv | 6 ++- src/wally/wallypipelinedcore.sv | 2 +- testbench/common/riscvassertions.sv | 60 +++++++++++++++-------------- 13 files changed, 47 insertions(+), 34 deletions(-) diff --git a/config/buildroot/wally-config.vh b/config/buildroot/wally-config.vh index fbb5799d5..3a68571dd 100644 --- a/config/buildroot/wally-config.vh +++ b/config/buildroot/wally-config.vh @@ -136,6 +136,7 @@ `define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/fpga/wally-config.vh b/config/fpga/wally-config.vh index 03bc3f755..1f7447f4d 100644 --- a/config/fpga/wally-config.vh +++ b/config/fpga/wally-config.vh @@ -145,6 +145,7 @@ `define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32e/wally-config.vh b/config/rv32e/wally-config.vh index b000b7911..aee0e5410 100644 --- a/config/rv32e/wally-config.vh +++ b/config/rv32e/wally-config.vh @@ -139,6 +139,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index d1571067b..ac68e3ee4 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -138,6 +138,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32i/wally-config.vh b/config/rv32i/wally-config.vh index 0f2e91c95..d75d0c462 100644 --- a/config/rv32i/wally-config.vh +++ b/config/rv32i/wally-config.vh @@ -139,6 +139,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32imc/wally-config.vh b/config/rv32imc/wally-config.vh index f6b29895d..42442d46e 100644 --- a/config/rv32imc/wally-config.vh +++ b/config/rv32imc/wally-config.vh @@ -138,6 +138,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64fpquad/wally-config.vh b/config/rv64fpquad/wally-config.vh index 3e4b91600..34d7628e0 100644 --- a/config/rv64fpquad/wally-config.vh +++ b/config/rv64fpquad/wally-config.vh @@ -141,6 +141,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index f0dad93b4..4e2ab3dfd 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -141,6 +141,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64i/wally-config.vh b/config/rv64i/wally-config.vh index f485c667d..34c37f73a 100644 --- a/config/rv64i/wally-config.vh +++ b/config/rv64i/wally-config.vh @@ -141,6 +141,7 @@ `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 0b1852cb0..512847046 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -150,14 +150,14 @@ module controller( ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110011: if (Funct7D == 7'b0000000 | Funct7D == 7'b0100000) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_0_0_0_0_0_00_0; // R-type - else if (Funct7D == 7'b0000001 & `M_SUPPORTED) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2]))) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui 7'b0111011: if ((Funct7D == 7'b0000000 | Funct7D == 7'b0100000) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i - else if (Funct7D == 7'b0000001 & `M_SUPPORTED & `XLEN == 64) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 4a85bf478..b62add60d 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -51,16 +51,18 @@ module mdu( // Divider // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When IDIV_ON_FPU is set, use the FPU divider instead - if (`IDIV_ON_FPU) begin + // In ZMMUL, with M_SUPPORTED = 0, omit the divider + if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; - end else begin + end else begin:div intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); end // Result multiplexer + // For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies always_comb case (Funct3M) 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index f94986349..d5458ed74 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -313,7 +313,7 @@ module wallypipelinedcore ( end // multiply/divide unit - if (`M_SUPPORTED) begin:mdu + if (`M_SUPPORTED | `ZMMUL_SUPPORTED) begin:mdu mdu mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, diff --git a/testbench/common/riscvassertions.sv b/testbench/common/riscvassertions.sv index f6cb4c6f6..f733aac58 100644 --- a/testbench/common/riscvassertions.sv +++ b/testbench/common/riscvassertions.sv @@ -23,40 +23,42 @@ module riscvassertions; initial begin - assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); - assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); - assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); - assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); + $display("IDIV_ON_FPU = %b M_SUPPORTED %b comb %b\n", `IDIV_ON_FPU, `M_SUPPORTED, ((`IDIV_ON_FPU) || (!`M_SUPPORTED))); + assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); + assert (`S_SUPPORTED || `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); + assert (`IDIV_BITSPERCYCLE == 1 || `IDIV_BITSPERCYCLE==2 || `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); + assert (`D_SUPPORTED || ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); + assert (`F_SUPPORTED || ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); + assert (`DCACHE_SUPPORTED || ~`F_SUPPORTED || `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); - assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`FLEN<=`XLEN || `DCACHE_SUPPORTED || `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 || (!`DCACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 || (!`ICACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES || (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES || (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); - assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); - assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`ZICSR_SUPPORTED == 1 || (`PMP_ENTRIES == 0 && `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 || (`S_SUPPORTED == 0 && `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); + assert (`U_SUPPORTED || (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 || (`DTIM_SUPPORTED == 0 && `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE_SUPPORTED == 0 && `ICACHE_SUPPORTED == 0) || `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); - assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); - assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); - assert (`SSTC_SUPPORTED == 0 | (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert (`DCACHE_SUPPORTED || (`A_SUPPORTED == 0)) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 || `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + assert (`SSTC_SUPPORTED == 0 || (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert ((`ZMMUL_SUPPORTED == 0) || (`M_SUPPORTED ==0)) else $error("At most one of ZMMUL_SUPPORTED and M_SUPPORTED can be enabled"); end endmodule From 69e83586396c89ba5ce8d4dad915a2332e2dd1a5 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 27 Feb 2023 17:37:29 -0600 Subject: [PATCH 33/55] Modified the BTB to save power by not updating when the prediction is unchanged. --- src/ifu/bpred/bpred.sv | 11 +++++------ src/ifu/bpred/btb.sv | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 7512e2d3b..3ad0506c4 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -96,7 +96,8 @@ module bpred ( logic BranchM, JumpM, ReturnM, CallM; logic BranchW, JumpW, ReturnW, CallW; logic WrongBPReturnD; - + logic [`XLEN-1:0] BTAE; + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor @@ -148,9 +149,9 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, - .BTAF, .BTAD, + .BTAF, .BTAD, .BTAE, .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), - .PredictionInstrClassWrongM, + .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, .IEUAdrE, .IEUAdrM, .InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), @@ -195,7 +196,7 @@ module bpred ( if(`ZICOUNTERS_SUPPORTED) begin logic JumpOrTakenBranchE; - logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; + logic [`XLEN-1:0] RASPCD, RASPCE; logic BTBPredPCWrongE, RASPredPCWrongE; // performance counters // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now @@ -214,8 +215,6 @@ module bpred ( flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index b0d988943..1d6c0ff8d 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -36,10 +36,12 @@ module btb #(parameter Depth = 10 ) ( input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC - output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAE, output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong + input logic AnyWrongPredInstrClassE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb @@ -53,9 +55,11 @@ module btb #(parameter Depth = 10 ) ( logic MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredF; - logic UpdateEn; logic [`XLEN-1:0] IEUAdrW; logic [`XLEN-1:0] PCW; + logic BTAWrongE, BTBWrongE; + logic BTBWrongM; + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. @@ -87,14 +91,22 @@ module btb #(parameter Depth = 10 ) ( assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + + // BTAE is not strickly necessary. However it is used by two parts of wally. + // 1. It gates updates to the BTB when the prediction does not change. This save power. + // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); + assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); + assign BTBWrongE = BTAWrongE | AnyWrongPredInstrClassE; + flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); From 3261f31e88236619ef21fffed61f3b95bf31977a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 27 Feb 2023 20:00:50 -0600 Subject: [PATCH 34/55] This icpred and btb changes are causing a performance issue. --- src/ifu/bpred/bpred.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 3ad0506c4..5c8938bca 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -174,9 +174,9 @@ module bpred ( // this will result in PCD not being equal to the fall through address PCLinkE (PCE+4). // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. - assign BPPCWrongE = PCCorrectE != PCD; + assign BPPCWrongE = ; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPWrongE = BPPCWrongE & InstrValidE & InstrValidD; + assign BPWrongE = (PCCorrectE != PCD) & InstrValidE & InstrValidD; flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPPredWrongM); // Output the predicted PC or corrected PC on miss-predict. From a823d8d0217aa53758f3e6e8bb95bcc2b1b7c582 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 28 Feb 2023 15:21:56 -0600 Subject: [PATCH 35/55] Undid the btb update as it reduces performance. --- src/ifu/bpred/bpred.sv | 5 ++--- src/ifu/bpred/btb.sv | 6 +++++- src/ifu/bpred/icpred.sv | 5 ++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 5c8938bca..61855055f 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -28,7 +28,7 @@ `include "wally-config.vh" -`define INSTR_CLASS_PRED 1 +`define INSTR_CLASS_PRED 0 module bpred ( input logic clk, reset, @@ -158,7 +158,7 @@ module bpred ( .InstrClassM({CallM, ReturnM, JumpM, BranchM}), .InstrClassW({CallW, ReturnW, JumpW, BranchW})); - icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); @@ -174,7 +174,6 @@ module bpred ( // this will result in PCD not being equal to the fall through address PCLinkE (PCE+4). // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. - assign BPPCWrongE = ; // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. assign BPWrongE = (PCCorrectE != PCD) & InstrValidE & InstrValidD; flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPPredWrongM); diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 1d6c0ff8d..31c3fed58 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -92,10 +92,14 @@ module btb #(parameter Depth = 10 ) ( assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; + logic UpdateEn; + // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + + assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index 78689806f..f6b0f7d11 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -28,9 +28,8 @@ `include "wally-config.vh" -`define INSTR_CLASS_PRED 1 -module icpred ( +module icpred #(parameter INSTR_CLASS_PRED = 1)( input logic clk, reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, @@ -49,7 +48,7 @@ module icpred ( logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic BPBranchD, BPJumpD, BPReturnD, BPCallD; - if (!`INSTR_CLASS_PRED) begin : DirectClassDecode + if (!INSTR_CLASS_PRED) begin : DirectClassDecode // This section is mainly for testing, verification, and PPA comparison. // An alternative to using the BTB to store the instruction class is to partially decode // the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions. From 8af61c0cc00c39027418b88855bca704fbed7c6c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 28 Feb 2023 15:37:25 -0600 Subject: [PATCH 36/55] Name changes to reflect diagrams. --- src/ifu/bpred/bpred.sv | 14 +++++++------- src/ifu/ifu.sv | 16 ++++++++-------- src/privileged/csr.sv | 8 ++++---- src/privileged/csrc.sv | 4 ++-- src/privileged/privileged.sv | 8 ++++---- src/wally/wallypipelinedcore.sv | 14 +++++++------- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 61855055f..49c9a02c0 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -39,7 +39,7 @@ module bpred ( input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class input logic [`XLEN-1:0] PCNextF, // Next Fetch Address input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 - output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction + output logic [`XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage // Update Predictor @@ -63,7 +63,7 @@ module bpred ( // Report branch prediction status output logic BPWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong @@ -79,7 +79,7 @@ module bpred ( logic BPPCSrcF; logic [`XLEN-1:0] BPPCF; - logic [`XLEN-1:0] PCNext0F; + logic [`XLEN-1:0] PC0NextF; logic [`XLEN-1:0] PCCorrectE; logic [3:0] WrongPredInstrClassD; @@ -176,21 +176,21 @@ module bpred ( // also flush the branch. This will change in a superscaler cpu. // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. assign BPWrongE = (PCCorrectE != PCD) & InstrValidE & InstrValidD; - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPPredWrongM); + flopenrc #(1) BPWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPWrongM); // Output the predicted PC or corrected PC on miss-predict. assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF); // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF); // If the prediction is wrong select the correct address. - mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPWrongE, PCNext1F); + mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF); // Correct branch/jump target. mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); // If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE. // Effectively this is PCM+4 or the non-existant PCLinkM - if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); + if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE); else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 12b1b8e8d..c85bb94d0 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -55,11 +55,11 @@ module ifu ( input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address output logic [`XLEN-1:0] PCE, // Execution stage instruction address output logic BPWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong // Mem output logic CommittedF, // I$ or bus memory operation started, delay interrupts input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. - output logic [`XLEN-1:0] PCNext2F, // Selected PC between branch prediction and next valid PC if CSRWriteFence + output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence output logic [31:0] InstrD, // The decoded instruction in Decode stage output logic [31:0] InstrM, // The decoded instruction in Memory stage output logic [`XLEN-1:0] PCM, // Memory stage instruction address @@ -132,7 +132,7 @@ module ifu ( logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal - logic [`XLEN-1:0] PCNext1F; // Branch predictor next PCF + logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF logic BusCommittedF; // Bus memory operation in flight, delay interrupts logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM @@ -297,8 +297,8 @@ module ifu ( //////////////////////////////////////////////////////////////////////////////////////////////// if(`ZICSR_SUPPORTED | `ZIFENCEI_SUPPORTED) - mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PCNext2F)); - else assign PCNext2F = PCNext1F; + mux2 #(`XLEN) pcmux2(.d0(PC1NextF), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PC2NextF)); + else assign PC2NextF = PC1NextF; assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); @@ -330,12 +330,12 @@ module ifu ( .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, - .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, + .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPWrongM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); end else begin : bpred - mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); + mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); assign BPWrongE = PCSrcE; assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; assign NextValidPCE = PCE; diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index ee3d947fd..3575a4ffc 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -37,7 +37,7 @@ module csr #(parameter input logic FlushM, FlushW, input logic StallE, StallM, StallW, input logic [31:0] InstrM, // current instruction - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return logic input logic [`XLEN-1:0] SrcAM, IEUAdrM, // SrcA and memory address from IEU input logic CSRReadM, CSRWriteM, // read or write CSR input logic TrapM, // trap is occurring @@ -61,7 +61,7 @@ module csr #(parameter input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, @@ -155,7 +155,7 @@ module csr #(parameter // A return sets the PC to MEPC or SEPC assign RetM = mretM | sretM; mux2 #(`XLEN) epcmux(SEPC_REGW, MEPC_REGW, mretM, EPC); - mux3 #(`XLEN) pcmux3(PCNext2F, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); + mux3 #(`XLEN) pcmux3(PC2NextF, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); /////////////////////////////////////////// // CSRWriteValM @@ -259,7 +259,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 5cc58ce34..eae514433 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -48,7 +48,7 @@ module csrc #(parameter input logic BTBPredPCWrongM, input logic RASPredPCWrongM, input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, @@ -97,7 +97,7 @@ module csrc #(parameter assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[14] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = BPPredWrongM & InstrValidNotFlushedM; // branch predictor wrong + assign CounterEvent[15] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong assign CounterEvent[`COUNTERS-1:16] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 93b7f9729..37904557d 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -38,7 +38,7 @@ module privileged ( input logic [`XLEN-1:0] SrcAM, // GPR register to write input logic [31:0] InstrM, // Instruction input logic [`XLEN-1:0] IEUAdrM, // address from IEU - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return PC logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic // control signals input logic InstrValidM, // Current instruction is valid (not flushed) input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt @@ -50,7 +50,7 @@ module privileged ( input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class - input logic BPPredWrongM, // branch predictor is wrong + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, // data cache miss @@ -121,11 +121,11 @@ module privileged ( // Control and Status Registers csr csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, - .InstrM, .PCM, .SrcAM, .IEUAdrM, .PCNext2F, + .InstrM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, .PredictionInstrClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 5882aa653..387212b97 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -66,7 +66,7 @@ module wallypipelinedcore ( logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE; logic [`XLEN-1:0] PCM; logic [`XLEN-1:0] CSRReadValW, MDUResultW; - logic [`XLEN-1:0] UnalignedPCNextF, PCNext2F; + logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; logic [1:0] MemRWM; logic InstrValidD, InstrValidE, InstrValidM; logic InstrMisalignedFaultM; @@ -140,7 +140,7 @@ module wallypipelinedcore ( logic LSUHWRITE; logic LSUHREADY; - logic BPWrongE, BPPredWrongM; + logic BPWrongE, BPWrongM; logic BPDirPredWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; @@ -169,11 +169,11 @@ module wallypipelinedcore ( .InstrValidM, .InstrValidE, .InstrValidD, .BranchD, .BranchE, .JumpD, .JumpE, // Fetch - .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, + .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute - .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPPredWrongM, + .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, @@ -284,12 +284,12 @@ module wallypipelinedcore ( privileged priv( .clk, .reset, .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PCNext2F, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, .RetM, .TrapM, .sfencevmaM, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, @@ -304,7 +304,7 @@ module wallypipelinedcore ( .FRM_REGW,.BreakpointFaultM, .EcallFaultM, .WFIStallM, .BigEndianM); end else begin assign CSRReadValW = 0; - assign UnalignedPCNextF = PCNext2F; + assign UnalignedPCNextF = PC2NextF; assign RetM = 0; assign TrapM = 0; assign WFIStallM = 0; From 87013ccaf0d998005105fae292db274dd9136f16 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 28 Feb 2023 15:57:34 -0600 Subject: [PATCH 37/55] Found the performance bug with the branch predictor btb power saving update. --- src/ifu/bpred/bpred.sv | 4 +++- src/ifu/bpred/btb.sv | 2 +- src/ifu/bpred/icpred.sv | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 49c9a02c0..72d2f2164 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -97,6 +97,8 @@ module bpred ( logic BranchW, JumpW, ReturnW, CallW; logic WrongBPReturnD; logic [`XLEN-1:0] BTAE; + + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. @@ -161,7 +163,7 @@ module bpred ( icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, - .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, .WrongBPReturnD); // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 31c3fed58..908288e2e 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -97,7 +97,7 @@ module btb #(parameter Depth = 10 ) ( // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index f6b0f7d11..c37c3938c 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -42,10 +42,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)( output logic ReturnD, ReturnE, ReturnM, ReturnW, input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, - output logic PredictionInstrClassWrongM, WrongBPReturnD + output logic PredictionInstrClassWrongM, WrongBPReturnD, AnyWrongPredInstrClassE ); - logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; + logic AnyWrongPredInstrClassD; logic BPBranchD, BPJumpD, BPReturnD, BPCallD; if (!INSTR_CLASS_PRED) begin : DirectClassDecode From bd6a1dcf4096144c1607499dc51354cf332ead22 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 28 Feb 2023 15:03:59 -0800 Subject: [PATCH 38/55] Pulled to latest commit of riscv-arch-test --- addins/riscv-arch-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index ee028eb32..a3b7f0c2c 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit ee028eb325525148a34420a4ca7959b24220a91e +Subproject commit a3b7f0c2cf89652b8a0cba3146890c512ff8ba44 From 2773048bd40518c6bb5bb138529b44df29553cf4 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 28 Feb 2023 17:48:58 -0600 Subject: [PATCH 39/55] Name cleanup. --- src/ifu/bpred/bpred.sv | 10 +++++----- src/ifu/bpred/btb.sv | 8 ++++---- src/ifu/bpred/icpred.sv | 10 +++++----- src/ifu/ifu.sv | 6 +++--- src/privileged/csr.sv | 4 ++-- src/privileged/csrc.sv | 4 ++-- src/privileged/privileged.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 +++--- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 72d2f2164..1074aea66 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -67,14 +67,14 @@ module bpred ( output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM // Class prediction is wrong + output logic IClassWrongM // Class prediction is wrong ); logic [1:0] BPDirPredF; logic [`XLEN-1:0] BTAF, RASPCF; logic BPPCWrongE; - logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; + logic IClassWrongE; logic BPDirPredWrongE; logic BPPCSrcF; @@ -153,7 +153,7 @@ module bpred ( .PCNextF, .PCF, .PCD, .PCE, .PCM, .BTAF, .BTAD, .BTAE, .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), - .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, + .IClassWrongM, .IClassWrongE, .IEUAdrE, .IEUAdrM, .InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), @@ -163,7 +163,7 @@ module bpred ( icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, - .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, .WrongBPReturnD); + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .WrongBPReturnD); // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, @@ -200,7 +200,7 @@ module bpred ( logic [`XLEN-1:0] RASPCD, RASPCE; logic BTBPredPCWrongE, RASPredPCWrongE; // performance counters - // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now + // 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) // 3. target ras (ras target wrong / class[2]) // 4. direction (br dir wrong / class[0]) diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 908288e2e..19c8c221a 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -40,8 +40,8 @@ module btb #(parameter Depth = 10 ) ( output logic [`XLEN-1:0] BTAE, output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update - input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong - input logic AnyWrongPredInstrClassE, + input logic IClassWrongM, // BTB's instruction class guess was wrong + input logic IClassWrongE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb @@ -99,7 +99,7 @@ module btb #(parameter Depth = 10 ) ( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; + assign UpdateEn = |InstrClassM | IClassWrongM; flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); @@ -108,7 +108,7 @@ module btb #(parameter Depth = 10 ) ( // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); - assign BTBWrongE = BTAWrongE | AnyWrongPredInstrClassE; + assign BTBWrongE = BTAWrongE | IClassWrongE; flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index c37c3938c..53b612cb0 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -42,10 +42,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)( output logic ReturnD, ReturnE, ReturnM, ReturnW, input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, - output logic PredictionInstrClassWrongM, WrongBPReturnD, AnyWrongPredInstrClassE + output logic IClassWrongM, WrongBPReturnD, IClassWrongE ); - logic AnyWrongPredInstrClassD; + logic IClassWrongD; logic BPBranchD, BPJumpD, BPReturnD, BPCallD; if (!INSTR_CLASS_PRED) begin : DirectClassDecode @@ -93,14 +93,14 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)( flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {CallM, ReturnM, JumpM, BranchM}, {CallW, ReturnW, JumpW, BranchW}); // branch predictor - flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); + flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, IClassWrongE, IClassWrongM); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, IClassWrongD, IClassWrongE); // pipeline the predicted class flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPCallF, BPReturnF, BPJumpF, BPBranchF}, {BPCallD, BPReturnD, BPJumpD, BPBranchD}); // branch class prediction wrong. - assign AnyWrongPredInstrClassD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); + assign IClassWrongD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); assign WrongBPReturnD = BPReturnD ^ ReturnD; endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index c85bb94d0..b049a956f 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -69,7 +69,7 @@ module ifu ( output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM, // Class prediction is wrong + output logic IClassWrongM, // Class prediction is wrong // Faults input logic IllegalBaseInstrD, // Illegal non-compressed instruction input logic IllegalFPUInstrD, // Illegal FP instruction @@ -332,12 +332,12 @@ module ifu ( .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPWrongM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); assign BPWrongE = PCSrcE; - assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; + assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 3575a4ffc..a54c05b2a 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -60,7 +60,7 @@ module csr #(parameter input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, + input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class @@ -259,7 +259,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .JumpOrTakenBranchM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index eae514433..f2b4b0e71 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -47,7 +47,7 @@ module csrc #(parameter input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, + input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class @@ -92,7 +92,7 @@ module csrc #(parameter assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions - assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong + assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 37904557d..f7a3caad6 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -49,7 +49,7 @@ module privileged ( input logic BPDirPredWrongM, // branch predictor guessed wrong directoin input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target - input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class + input logic IClassWrongM, // branch predictor guessed wrong instruction class input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class input logic JumpOrTakenBranchM, // actual instruction class @@ -126,7 +126,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .PredictionInstrClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, + .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 387212b97..8f8885934 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -144,7 +144,7 @@ module wallypipelinedcore ( logic BPDirPredWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; - logic PredictionInstrClassWrongM; + logic IClassWrongM; logic [3:0] InstrClassM; logic InstrAccessFaultF, HPTWInstrAccessFaultM; logic [2:0] LSUHSIZE; @@ -177,7 +177,7 @@ module wallypipelinedcore ( // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, - .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, + .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management @@ -290,7 +290,7 @@ module wallypipelinedcore ( .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, - .RASPredPCWrongM, .PredictionInstrClassWrongM, + .RASPredPCWrongM, .IClassWrongM, .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, From 72b92e8c0d0b1cc425b950a174945ce0fa20479b Mon Sep 17 00:00:00 2001 From: eroom1966 Date: Wed, 1 Mar 2023 15:37:11 +0000 Subject: [PATCH 40/55] update testbench for memory privileges also update configuration to define value of mimpid --- sim/imperas.ic | 7 +++++ testbench/testbench_imperas.sv | 57 +++++++++++++++++++++++++++++----- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index d28234bbc..69d57f702 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -1,3 +1,7 @@ +#--showoverrides +#--help --helpall +--traceregs + --override cpu/show_c_prefix=T --override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 @@ -33,3 +37,6 @@ # ignore settings of bits DAU for non leaf page table walks --override cpu/ignore_non_leaf_DAU=1 + +# mimpid = 0x100 +--override cpu/mimpid=0x100 diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index f26339007..cbca49275 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -137,7 +137,12 @@ module testbench; .CMP_CSR (1) ) idv_trace2api(rvvi); + int PRIV_RWX = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE | RVVI_MEMORY_PRIVILEGE_EXEC; + int PRIV_RW = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE; + int PRIV_X = RVVI_MEMORY_PRIVILEGE_EXEC; + initial begin + MAX_ERRS = 3; // Initialize REF (do this before initializing the DUT) @@ -158,6 +163,41 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME + + // cannot predict this register due to latency between + // pending and taken + void'(rvviRefCsrSetVolatile(0, 32'h344)); + rvviRefCsrCompareEnable(0, 32'h344, RVVI_FALSE); + + // Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC}) + void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0)); + if (`BOOTROM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`BOOTROM_BASE, (`BOOTROM_BASE + `BOOTROM_RANGE), PRIV_X)); + if (`UNCORE_RAM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`UNCORE_RAM_BASE, (`UNCORE_RAM_BASE + `UNCORE_RAM_RANGE), PRIV_RWX)); + if (`EXT_MEM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`EXT_MEM_BASE, (`EXT_MEM_BASE + `EXT_MEM_RANGE), PRIV_RWX)); + + if (`CLINT_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); + end + if (`GPIO_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE))); + end + if (`UART_SUPPORTED) begin + void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); + void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + end + if (`PLIC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE))); + end + if (`SDC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`SDC_BASE, (`SDC_BASE + `SDC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`SDC_BASE, (`SDC_BASE + `SDC_RANGE))); + end if(`XLEN==32) begin void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH @@ -166,14 +206,15 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH end - // Enable the trace2log module - if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); - end - - if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); - end + // These should be done in the attached client +// // Enable the trace2log module +// if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); +// end +// +// if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); +// end end final begin From dd2433f7ff49728d4b243fe868f3080bd2b30a19 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 10:45:40 -0600 Subject: [PATCH 41/55] Minor fix to btb. --- src/ifu/bpred/btb.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 19c8c221a..d0723cd9b 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -57,8 +57,8 @@ module btb #(parameter Depth = 10 ) ( logic [`XLEN+3:0] TableBTBPredF; logic [`XLEN-1:0] IEUAdrW; logic [`XLEN-1:0] PCW; - logic BTAWrongE, BTBWrongE; - logic BTBWrongM; + logic BTBWrongE, BTAWrongE; + logic BTBWrongM, BTAWrongM; // hashing function for indexing the PC @@ -108,9 +108,11 @@ module btb #(parameter Depth = 10 ) ( // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); - assign BTBWrongE = BTAWrongE | IClassWrongE; - flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); - + //assign BTBWrongE = BTAWrongE | IClassWrongE; + //flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); + flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM); + assign BTBWrongM = BTAWrongM | IClassWrongM; + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); From 08a1153ae91a12fc561b634a9a99cb29324871c0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 10:47:00 -0600 Subject: [PATCH 42/55] More btb cleanup. --- src/ifu/bpred/btb.sv | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index d0723cd9b..b14399704 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -92,15 +92,11 @@ module btb #(parameter Depth = 10 ) ( assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; - logic UpdateEn; - // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); - assign UpdateEn = |InstrClassM | IClassWrongM; - flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); // BTAE is not strickly necessary. However it is used by two parts of wally. @@ -108,8 +104,7 @@ module btb #(parameter Depth = 10 ) ( // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); - //assign BTBWrongE = BTAWrongE | IClassWrongE; - //flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); + flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM); assign BTBWrongM = BTAWrongM | IClassWrongM; From e8744684cd0500aac962c79c5f2ddb3acf32de64 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 11:24:24 -0600 Subject: [PATCH 43/55] Branch predictor cleanup. I think Ch 10 is now done except for BTB performance analysis and the section on running benchmarks and collecting data. --- src/ifu/bpred/RASPredictor.sv | 4 ++-- src/ifu/bpred/bpred.sv | 6 +++--- src/ifu/bpred/icpred.sv | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 72c594556..d5fd0c019 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,7 +33,7 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic WrongBPReturnD, // Prediction class is wrong + input logic BPReturnWrongD, // Prediction class is wrong input logic ReturnD, input logic ReturnE, CallE, // Instr class input logic BPReturnF, @@ -61,7 +61,7 @@ module RASPredictor #(parameter int StackSize = 16 )( assign PopF = BPReturnF & ~StallD & ~FlushD; assign PushE = CallE & ~StallM & ~FlushM; - assign WrongPredReturnD = (WrongBPReturnD) & ~StallE & ~FlushE; + assign WrongPredReturnD = (BPReturnWrongD) & ~StallE & ~FlushE; assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (~StallM & FlushM & ReturnE); // flushed return assign RepairD = WrongPredReturnD | FlushedReturnDE ; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 1074aea66..84a50c621 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -95,7 +95,7 @@ module bpred ( logic ReturnE, CallE; logic BranchM, JumpM, ReturnM, CallM; logic BranchW, JumpW, ReturnW, CallW; - logic WrongBPReturnD; + logic BPReturnWrongD; logic [`XLEN-1:0] BTAE; @@ -163,12 +163,12 @@ module bpred ( icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, - .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .WrongBPReturnD); + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD); // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, .BPReturnF, .ReturnD, .ReturnE, .CallE, - .WrongBPReturnD, .RASPCF, .PCLinkE); + .BPReturnWrongD, .RASPCF, .PCLinkE); // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index 53b612cb0..14e7c8d89 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -42,7 +42,7 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)( output logic ReturnD, ReturnE, ReturnM, ReturnW, input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, - output logic IClassWrongM, WrongBPReturnD, IClassWrongE + output logic IClassWrongM, BPReturnWrongD, IClassWrongE ); logic IClassWrongD; @@ -101,6 +101,6 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)( // branch class prediction wrong. assign IClassWrongD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); - assign WrongBPReturnD = BPReturnD ^ ReturnD; + assign BPReturnWrongD = BPReturnD ^ ReturnD; endmodule From a61f8bc4cf54f2d485f924b997c950315319fa97 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 11:52:42 -0600 Subject: [PATCH 44/55] Set bp to use instruction class prediction by default. --- src/ifu/bpred/bpred.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 84a50c621..92da8622e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -28,7 +28,7 @@ `include "wally-config.vh" -`define INSTR_CLASS_PRED 0 +`define INSTR_CLASS_PRED 1 module bpred ( input logic clk, reset, From 3d1ffac7d7c31353ac6fd3c5048b11a802501b8f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 16:40:42 -0600 Subject: [PATCH 45/55] Cleaned up branch predictor performance counters. --- config/rv32gc/wally-config.vh | 2 +- sim/sim-imperas | 2 +- src/ifu/bpred/bpred.sv | 9 ++------- src/ifu/ifu.sv | 5 ++--- src/privileged/csr.sv | 3 +-- src/privileged/csrc.sv | 5 ++--- src/privileged/privileged.sv | 3 +-- src/wally/wallypipelinedcore.sv | 5 ++--- 8 files changed, 12 insertions(+), 22 deletions(-) diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index ac68e3ee4..d7475cdbe 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -134,7 +134,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -`define BPRED_SIZE 10 +`define BPRED_SIZE 16 `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 diff --git a/sim/sim-imperas b/sim/sim-imperas index b6a7f1c47..aa1dc3a01 100755 --- a/sim/sim-imperas +++ b/sim/sim-imperas @@ -29,4 +29,4 @@ IMPERAS_TOOLS=$(pwd)/imperas.ic \ OTHERFLAGS="+TRACE2LOG_ENABLE=1 VERBOSE=1" \ TESTDIR=${WALLY}/tests/riscof/work/wally-riscv-arch-test/rv64i_m/privilege/src/Lee.S/ \ -vsim -do "do wally-pipelined-imperas.do rv64gc" +vsim -do "do wally-imperas.do rv64gc" diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 92da8622e..626da8964 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -59,7 +59,6 @@ module bpred ( input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status output logic BPWrongE, // Prediction is wrong @@ -196,7 +195,6 @@ module bpred ( else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin - logic JumpOrTakenBranchE; logic [`XLEN-1:0] RASPCD, RASPCE; logic BTBPredPCWrongE, RASPredPCWrongE; // performance counters @@ -209,13 +207,10 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. + // **** use BTAWrongM from BTB. assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; - assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; - - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, @@ -223,7 +218,7 @@ module bpred ( {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; + assign {BTBPredPCWrongM, RASPredPCWrongM} = '0; end // **** Fix me diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index b049a956f..78cd8c072 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -65,7 +65,6 @@ module ifu ( output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong @@ -88,7 +87,7 @@ module ifu ( input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits + output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault @@ -331,7 +330,7 @@ module ifu ( .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPWrongM, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .BPWrongM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index a54c05b2a..58d00d0a1 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -63,7 +63,6 @@ module csr #(parameter input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, @@ -259,7 +258,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .JumpOrTakenBranchM, .BPWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index f2b4b0e71..6ace0de30 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -50,7 +50,6 @@ module csrc #(parameter input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, @@ -86,10 +85,10 @@ module csrc #(parameter assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target - assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions + assign CounterEvent[7] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index f7a3caad6..3cecc6fcd 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -52,7 +52,6 @@ module privileged ( input logic IClassWrongM, // branch predictor guessed wrong instruction class input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, // data cache miss input logic DCacheAccess, // data cache accessed (hit or miss) input logic ICacheMiss, // instruction cache miss @@ -126,7 +125,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, + .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 8f8885934..be85420f7 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -160,7 +160,6 @@ module wallypipelinedcore ( logic BigEndianM; logic FCvtIntE; logic CommittedF; - logic JumpOrTakenBranchM; logic BranchD, BranchE, JumpD, JumpE; // instruction fetch unit: PC, branch prediction, instruction cache @@ -176,7 +175,7 @@ module wallypipelinedcore ( .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, - .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, + .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, @@ -291,7 +290,7 @@ module wallypipelinedcore ( .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, - .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, From 1169567219e64e0c09d97fe75976691100901737 Mon Sep 17 00:00:00 2001 From: eroom1966 Date: Thu, 2 Mar 2023 15:25:27 +0000 Subject: [PATCH 46/55] fix the memory map privileges in the REF model view --- sim/imperas.ic | 1 + testbench/testbench_imperas.sv | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index 69d57f702..5e3357eac 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -5,6 +5,7 @@ --override cpu/show_c_prefix=T --override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 +--override cpu/wfi_is_nop=T # Enable the Imperas instruction coverage #-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0 diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index cbca49275..442edec22 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -149,10 +149,12 @@ module testbench; if (!rvviVersionCheck(RVVI_API_VERSION)) begin msgfatal($sformatf("%m @ t=%0t: Expecting RVVI API version %0d.", $time, RVVI_API_VERSION)); end - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); - void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); + void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6)); + if (!rvviRefInit(elffilename)) begin msgfatal($sformatf("%m @ t=%0t: rvviRefInit failed", $time)); end @@ -166,8 +168,8 @@ module testbench; // cannot predict this register due to latency between // pending and taken - void'(rvviRefCsrSetVolatile(0, 32'h344)); - rvviRefCsrCompareEnable(0, 32'h344, RVVI_FALSE); + void'(rvviRefCsrSetVolatile(0, 32'h344)); // MIP + void'(rvviRefCsrSetVolatile(0, 32'h144)); // SIP // Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC}) void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0)); @@ -187,8 +189,8 @@ module testbench; void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE))); end if (`UART_SUPPORTED) begin - void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); - void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + void'(rvviRefMemorySetPrivilege(`UART_BASE, (`UART_BASE + `UART_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE))); end if (`PLIC_SUPPORTED) begin void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW)); @@ -206,6 +208,8 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH end + void'(rvviRefCsrSetVolatile(0, 32'h104)); // SIE - Temporary!!!! + // These should be done in the attached client // // Enable the trace2log module // if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin @@ -217,6 +221,11 @@ module testbench; // end end + always @(dut.core.MTimerInt) void'(rvvi.net_push("MTimerInterrupt", dut.core.MTimerInt)); + always @(dut.core.MExtInt) void'(rvvi.net_push("MExternalInterrupt", dut.core.MExtInt)); + always @(dut.core.SExtInt) void'(rvvi.net_push("SExternalInterrupt", dut.core.SExtInt)); + always @(dut.core.MSwInt) void'(rvvi.net_push("MSWInterrupt", dut.core.MSwInt)); + final begin void'(rvviRefShutdown()); end From 9bac643db2cf6c2a4a5e7b78aa403008082ecb9f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 22:16:30 -0600 Subject: [PATCH 47/55] Added support for branch target buffer stats. --- bin/parseHPMC.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 7b695d34d..2da172773 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -156,7 +156,7 @@ def GeometricAverage(benchmarks, field): return Product ** (1.0/index) def ComputeGeometricAverage(benchmarks): - fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR'] + fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI'] AllAve = {} for field in fields: Product = 1 @@ -205,9 +205,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] size = len(benchmarkDict) index = 1 @@ -272,7 +272,9 @@ if(sys.argv[1] == '-b'): else: # steps 1 and 2 benchmarks = ProcessFile(sys.argv[1]) - ComputeAverage(benchmarks) + print(benchmarks[0]) + ComputeAll(benchmarks) + ComputeGeometricAverage(benchmarks) # 3 process into useful data # cache hit rates # cache fill time @@ -280,7 +282,6 @@ else: # hazard counts # CPI # instruction distribution - ComputeAll(benchmarks) for benchmark in benchmarks: printStats(benchmark) From 983e30dcb1cb3fe58c28f64f8acecfacf8782f8a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 22:32:13 -0600 Subject: [PATCH 48/55] Fixed bug in performance counter script. --- bin/parseHPMC.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 2da172773..5b131237a 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -205,9 +205,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] size = len(benchmarkDict) index = 1 @@ -248,11 +248,11 @@ if(sys.argv[1] == '-b'): dct[PredType] = (currSize, currPercent) print(dct) fig, axes = plt.subplots() - marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x'} - colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue'} + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'} for cat in dct: (x, y) = dct[cat] - x=[int(2**int(v)/4) for v in x] + x=[int(2**int(v)) for v in x] print(x, y) axes.plot(x,y, color=colors[cat]) axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat]) @@ -262,9 +262,9 @@ if(sys.argv[1] == '-b'): axes.legend(loc='upper left') axes.set_xscale("log") axes.set_ylabel('Prediction Accuracy') - axes.set_xlabel('Size (bytes)') - axes.set_xticks([16, 64, 256, 1024, 4096, 16384]) - axes.set_xticklabels([16, 64, 256, 1024, 4096, 16384]) + axes.set_xlabel('Entries') + axes.set_xticks([64, 256, 1024, 4096, 16384, 65536]) + axes.set_xticklabels([64, 256, 1024, 4096, 16384, 65536]) axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) plt.show() From e257ec96ac733b8aa580ab3f2141dad4d15cef53 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:04:31 -0600 Subject: [PATCH 49/55] Reordered performance counters and added space for new ones. --- src/privileged/csrc.sv | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 6ace0de30..2ea0a0540 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -84,20 +84,29 @@ module csrc #(parameter if(`QEMU) begin: cevent // No other performance counters in QEMU assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target - assign CounterEvent[7] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address - assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions + assign CounterEvent[3] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction + assign CounterEvent[4] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions + assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions + assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong + assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target + assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; // data cache access - assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access - assign CounterEvent[14] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong - assign CounterEvent[`COUNTERS-1:16] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions + assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[12] = '0 & InstrValidNotFlushedM; // /// ********** store + assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access + assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[15] = '0; // //// ******* d cache miss cycles + assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access + assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[19] = '0; // ******** CSR writes + assign CounterEvent[20] = '0; // ******** fence.i + assign CounterEvent[21] = '0; // ******** sfence.vma + assign CounterEvent[22] = '0; // ******** # interrupts + assign CounterEvent[23] = '0; // ******** # exceptions + assign CounterEvent[24] = '0; // ******** # division cycles + assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end // Counter update and write logic From cf4d8e6bd0904c8a97539b449eb03a39141b2ef1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:10:54 -0600 Subject: [PATCH 50/55] Added store stall to performance counters. --- src/privileged/csr.sv | 3 ++- src/privileged/csrc.sv | 13 +++++++------ src/privileged/privileged.sv | 13 +++++++------ src/wally/wallypipelinedcore.sv | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 58d00d0a1..50951bd09 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -57,6 +57,7 @@ module csr #(parameter input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, + input logic StoreStallD, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -257,7 +258,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, + .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 2ea0a0540..7a166effa 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -43,7 +43,7 @@ module csrc #(parameter input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, + input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, StoreStallD, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -55,7 +55,7 @@ module csrc #(parameter input logic ICacheMiss, input logic ICacheAccess, input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, input logic [63:0] MTIME_CLINT, @@ -67,6 +67,7 @@ module csrc #(parameter logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; logic LoadStallE, LoadStallM; + logic StoreStallE, StoreStallM; logic [`COUNTERS-1:0] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; @@ -74,8 +75,8 @@ module csrc #(parameter genvar i; // Interface signals - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall. + flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM})); // Determine when to increment each counter assign CounterEvent[0] = 1'b1; // MCYCLE always increments @@ -92,8 +93,8 @@ module csrc #(parameter assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong - assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[12] = '0 & InstrValidNotFlushedM; // /// ********** store + assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[15] = '0; // //// ******* d cache miss cycles diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 3cecc6fcd..5f6c17752 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -46,11 +46,12 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic BPDirPredWrongM, // branch predictor guessed wrong directoin - input logic BTBPredPCWrongM, // branch predictor guessed wrong target - input logic RASPredPCWrongM, // return adddress stack guessed wrong target - input logic IClassWrongM, // branch predictor guessed wrong instruction class - input logic BPWrongM, // branch predictor is wrong + input logic StoreStallD, // load instruction is stalling + input logic BPDirPredWrongM, // branch predictor guessed wrong direction + input logic BTBPredPCWrongM, // branch predictor guessed wrong target + input logic RASPredPCWrongM, // return adddress stack guessed wrong target + input logic IClassWrongM, // branch predictor guessed wrong instruction class + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class input logic DCacheMiss, // data cache miss input logic DCacheAccess, // data cache accessed (hit or miss) @@ -123,7 +124,7 @@ module privileged ( .InstrM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, - .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, + .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index be85420f7..6acd96924 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -287,7 +287,7 @@ module wallypipelinedcore ( .InstrM, .CSRReadValW, .UnalignedPCNextF, .RetM, .TrapM, .sfencevmaM, .InstrValidM, .CommittedM, .CommittedF, - .FRegWriteM, .LoadStallD, + .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, From 3dbfa96aefcaddcad9245b02e3dc6af188119514 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:21:29 -0600 Subject: [PATCH 51/55] Added csr write counter, sfence vma counter, interrupt counter, and exception counter. --- src/privileged/csr.sv | 7 +++++-- src/privileged/csrc.sv | 16 ++++++++++------ src/privileged/privileged.sv | 9 +++++---- src/privileged/trap.sv | 2 +- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 50951bd09..2630e0f76 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -44,6 +44,7 @@ module csr #(parameter input logic mretM, sretM, wfiM, // return or WFI instruction input logic IntPendingM, // at least one interrupt is pending and could occur if enabled input logic InterruptM, // interrupt is occurring + input logic ExceptionM, // interrupt is occurring input logic MTimerInt, // timer interrupt input logic MExtInt, SExtInt, // external interrupt (from PLIC) input logic MSwInt, // software interrupt @@ -68,6 +69,7 @@ module csr #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic sfencevmaM, // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -258,9 +260,10 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRMWriteM, + .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, + .InterruptM, .ExceptionM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 7a166effa..131be4ec6 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -43,7 +43,8 @@ module csrc #(parameter input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, StoreStallD, + input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, + input logic CSRMWriteM, CSRWriteM, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -54,6 +55,9 @@ module csrc #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic sfencevmaM, + input logic InterruptM, + input logic ExceptionM, input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -100,12 +104,12 @@ module csrc #(parameter assign CounterEvent[15] = '0; // //// ******* d cache miss cycles assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[18] = '0; // //// ******** i cache miss cycles - assign CounterEvent[19] = '0; // ******** CSR writes + assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes assign CounterEvent[20] = '0; // ******** fence.i - assign CounterEvent[21] = '0; // ******** sfence.vma - assign CounterEvent[22] = '0; // ******** # interrupts - assign CounterEvent[23] = '0; // ******** # exceptions + assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma + assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low + assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low assign CounterEvent[24] = '0; // ******** # division cycles assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 5f6c17752..679d13bd6 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -106,9 +106,9 @@ module privileged ( logic DelegateM; // trap should be delegated logic wfiM; // wait for interrupt instruction logic IntPendingM; // interrupt is pending, even if not enabled. ends wfi - logic InterruptM; // interrupt occuring - - + logic InterruptM; // interrupt occuring + logic ExceptionM; // Memory stage instruction caused a fault + // track the current privilege level privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); @@ -126,6 +126,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, + .sfencevmaM, .ExceptionM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, @@ -149,7 +150,7 @@ module privileged ( .mretM, .sretM, .PrivilegeModeW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE, .InstrValidM, .CommittedM, .CommittedF, - .TrapM, .RetM, .wfiM, .InterruptM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); + .TrapM, .RetM, .wfiM, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); endmodule diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index d8ad28f56..1d98763fc 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -45,6 +45,7 @@ module trap ( output logic TrapM, // Trap is occurring output logic RetM, // Return instruction being executed output logic InterruptM, // Interrupt is occurring + output logic ExceptionM, // exception is occurring output logic IntPendingM, // Interrupt is pending, might occur if enabled output logic DelegateM, // Delegate trap to supervisor handler output logic WFIStallM, // Stall due to WFI instruction @@ -52,7 +53,6 @@ module trap ( ); logic MIntGlobalEnM, SIntGlobalEnM; // Global interupt enables - logic ExceptionM; // exception is occurring logic Committed; // LSU or IFU has committed to a bus operation that can't be interrupted logic BothInstrAccessFaultM; // instruction or HPTW ITLB fill caused an Instruction Access Fault logic [11:0] PendingIntsM, ValidIntsM, EnabledIntsM; // interrupts are pending, valid, or enabled From b19d51b6a292df6d3d5c4391b48e287f1a4c5dff Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:29:20 -0600 Subject: [PATCH 52/55] Added fence counter. --- src/ieu/controller.sv | 13 +++++++------ src/ieu/ieu.sv | 5 +++-- src/privileged/csr.sv | 3 ++- src/privileged/csrc.sv | 3 ++- src/privileged/privileged.sv | 3 ++- src/wally/wallypipelinedcore.sv | 5 +++-- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 512847046..8e89656a1 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -38,7 +38,9 @@ module controller( output logic [2:0] ImmSrcD, // Type of immediate extension input logic IllegalIEUFPUInstrD, // Illegal IEU and FPU instruction output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers - // Execute stage control signals + output logic JumpD, // Jump instruction + output logic BranchD, // Branch instruction + // Execute stage control signals input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] FlagsE, // Comparison flags ({eq, lt}) input logic FWriteIntE, // Write integer register, coming from FPU controller @@ -51,7 +53,8 @@ module controller( output logic IntDivE, // Integer divide output logic MDUE, // MDU (multiply/divide) operatio output logic W64E, // RV64 W-type operation - output logic JumpE, // jump instruction + output logic JumpE, // jump instruction + output logic BranchE, // Branch instruction output logic SCE, // Store Conditional instruction output logic BranchSignedE, // Branch comparison operands are signed (if it's a branch) // Memory stage control signals @@ -63,9 +66,7 @@ module controller( output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid - output logic BranchD, BranchE, - output logic JumpD, - + output logic FenceM, // Fence instruction output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals input logic StallW, FlushW, // Stall, flush Writeback stage @@ -109,7 +110,7 @@ module controller( logic IEURegWriteE; // Register write logic IllegalERegAdrD; // RV32E attempts to write upper 16 registers logic [1:0] AtomicE; // Atomic instruction - logic FenceD, FenceE, FenceM; // Fence instruction + logic FenceD, FenceE; // Fence instruction logic SFenceVmaD; // sfence.vma instruction logic IntDivM; // Integer divide instruction diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 346594eba..854233741 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -71,7 +71,8 @@ module ieu ( output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit output logic MDUStallD, CSRRdStallD, StoreStallD, output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction - output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions + output logic CSRWriteFenceM, // CSR write or fence instruction needs to flush subsequent instructions + output logic FenceM ); logic [2:0] ImmSrcD; // Select type of immediate extension @@ -99,7 +100,7 @@ module ieu ( .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, - .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); + .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .FenceM, .StoreStallD); datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 2630e0f76..ce9acb6e0 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -70,6 +70,7 @@ module csr #(parameter input logic ICacheMiss, input logic ICacheAccess, input logic sfencevmaM, + input logic FenceM, // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -263,7 +264,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, + .InterruptM, .ExceptionM, .FenceM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 131be4ec6..85ce7f6fe 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -58,6 +58,7 @@ module csrc #(parameter input logic sfencevmaM, input logic InterruptM, input logic ExceptionM, + input logic FenceM, input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -106,7 +107,7 @@ module csrc #(parameter assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[18] = '0; // //// ******** i cache miss cycles assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes - assign CounterEvent[20] = '0; // ******** fence.i + assign CounterEvent[20] = FenceM & InstrValidNotFlushedM; // fence.i assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 679d13bd6..14e7ce1d8 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -84,6 +84,7 @@ module privileged ( // control outputs output logic RetM, TrapM, // return instruction, or trap output logic sfencevmaM, // sfence.vma instruction + input logic FenceM, // fence instruction output logic BigEndianM, // Use big endian in current privilege mode // Fault outputs output logic BreakpointFaultM, EcallFaultM, // breakpoint and Ecall traps should retire @@ -126,7 +127,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, + .sfencevmaM, .ExceptionM, .FenceM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 6acd96924..53ea756cb 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -161,6 +161,7 @@ module wallypipelinedcore ( logic FCvtIntE; logic CommittedF; logic BranchD, BranchE, JumpD, JumpE; + logic FenceM; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, @@ -207,7 +208,7 @@ module wallypipelinedcore ( // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE, - .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .StoreStallD); + .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .FenceM, .StoreStallD); lsu lsu( .clk, .reset, .StallM, .FlushM, .StallW, .FlushW, @@ -285,7 +286,7 @@ module wallypipelinedcore ( .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, - .RetM, .TrapM, .sfencevmaM, + .RetM, .TrapM, .sfencevmaM, .FenceM, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, From 4b501f6e030704f8ec414d9439f5fb085a598b9a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:54:56 -0600 Subject: [PATCH 53/55] Added the i and d cache cycle counters. --- src/ifu/ifu.sv | 6 +++--- src/lsu/lsu.sv | 2 +- src/privileged/csr.sv | 4 +++- src/privileged/csrc.sv | 6 ++++-- src/privileged/privileged.sv | 6 ++++-- src/wally/wallypipelinedcore.sv | 6 ++++-- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 78cd8c072..2f403ff53 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -65,10 +65,11 @@ module ifu ( output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic BPDirPredWrongM, // Prediction direction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic IClassWrongM, // Class prediction is wrong + output logic IClassWrongM, // Class prediction is wrong + output logic ICacheStallF, // I$ busy with multicycle operation // Faults input logic IllegalBaseInstrD, // Illegal non-compressed instruction input logic IllegalFPUInstrD, // Illegal FP instruction @@ -127,7 +128,6 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation - logic ICacheStallF; // I$ busy with multicycle operation logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 18383e0dd..9f11f7007 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -54,6 +54,7 @@ module lsu ( input logic [1:0] PrivilegeModeW, // Current privilege mode input logic BigEndianM, // Swap byte order to big endian input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries + output logic DCacheStallM, // D$ busy with multicycle operation // fpu input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU input logic FpLoadStoreM, // Selects FPU as store for write data @@ -103,7 +104,6 @@ module lsu ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 - logic DCacheStallM; // D$ busy with multicycle operation logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index ce9acb6e0..306463a62 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -59,6 +59,8 @@ module csr #(parameter // inputs for performance counters input logic LoadStallD, input logic StoreStallD, + input logic ICacheStallF, + input logic DCacheStallM, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -264,7 +266,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, .FenceM, + .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 85ce7f6fe..bc2c2b963 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -55,6 +55,8 @@ module csrc #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic ICacheStallF, + input logic DCacheStallM, input logic sfencevmaM, input logic InterruptM, input logic ExceptionM, @@ -102,10 +104,10 @@ module csrc #(parameter assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = '0; // //// ******* d cache miss cycles + assign CounterEvent[15] = DCacheStallM; // d cache miss cycles assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[18] = ICacheStallF; // i cache miss cycles assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes assign CounterEvent[20] = FenceM & InstrValidNotFlushedM; // fence.i assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 14e7ce1d8..0a85d52db 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -46,7 +46,9 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic StoreStallD, // load instruction is stalling + input logic StoreStallD, // store instruction is stalling + input logic ICacheStallF, // I cache stalled + input logic DCacheStallM, // D cache stalled input logic BPDirPredWrongM, // branch predictor guessed wrong direction input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target @@ -127,7 +129,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, .FenceM, + .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 53ea756cb..567d10e77 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -162,12 +162,13 @@ module wallypipelinedcore ( logic CommittedF; logic BranchD, BranchE, JumpD, JumpE; logic FenceM; + logic DCacheStallM, ICacheStallF; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, - .BranchD, .BranchE, .JumpD, .JumpE, + .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, // Fetch .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, @@ -231,6 +232,7 @@ module wallypipelinedcore ( .STATUS_MPRV, // from csr .STATUS_MPP, // from csr .sfencevmaM, // connects to privilege + .DCacheStallM, // connects to privilege .LoadPageFaultM, // connects to privilege .StoreAmoPageFaultM, // connects to privilege .LoadMisalignedFaultM, // connects to privilege @@ -286,7 +288,7 @@ module wallypipelinedcore ( .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, - .RetM, .TrapM, .sfencevmaM, .FenceM, + .RetM, .TrapM, .sfencevmaM, .FenceM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, From bdab2c8506c0fe697424f94ff3d6f91e6e4cd066 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:59:52 -0600 Subject: [PATCH 54/55] Added divide cycle counter. --- src/privileged/csr.sv | 4 +++- src/privileged/csrc.sv | 4 +++- src/privileged/privileged.sv | 4 +++- src/wally/wallypipelinedcore.sv | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 306463a62..5fc5d18e9 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -73,6 +73,8 @@ module csr #(parameter input logic ICacheAccess, input logic sfencevmaM, input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -266,7 +268,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, + .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index bc2c2b963..feab9404f 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -61,6 +61,8 @@ module csrc #(parameter input logic InterruptM, input logic ExceptionM, input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -113,7 +115,7 @@ module csrc #(parameter assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low - assign CounterEvent[24] = '0; // ******** # division cycles + assign CounterEvent[24] = DivBusyE | FDivBusyE; // division cycles *** RT: might need to be delay until the next cycle assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 0a85d52db..fd4bb0f83 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -59,6 +59,8 @@ module privileged ( input logic DCacheAccess, // data cache accessed (hit or miss) input logic ICacheMiss, // instruction cache miss input logic ICacheAccess, // instruction cache access + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // fault sources input logic InstrAccessFaultF, // instruction access fault input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault @@ -129,7 +131,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, + .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 567d10e77..4e68ce481 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -292,7 +292,7 @@ module wallypipelinedcore ( .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, - .RASPredPCWrongM, .IClassWrongM, + .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, From 7dd8fa16c1d2a8229c1dea210077e169f9054ace Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 00:18:34 -0600 Subject: [PATCH 55/55] Renamed BTB misprediction to BTA. --- src/ifu/bpred/bpred.sv | 6 +++--- src/ifu/ifu.sv | 6 +++--- src/privileged/csr.sv | 4 ++-- src/privileged/csrc.sv | 4 ++-- src/privileged/privileged.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 626da8964..f2f16b51e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -64,7 +64,7 @@ module bpred ( output logic BPWrongE, // Prediction is wrong output logic BPWrongM, // Prediction is wrong output logic BPDirPredWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic IClassWrongM // Class prediction is wrong ); @@ -215,10 +215,10 @@ module bpred ( flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); + {BPDirPredWrongM, BTAWrongM, RASPredPCWrongM}); end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM} = '0; + assign {BTAWrongM, RASPredPCWrongM} = '0; end // **** Fix me diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 2f403ff53..2c2ee7b4e 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -66,7 +66,7 @@ module ifu ( // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic BPDirPredWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic IClassWrongM, // Class prediction is wrong output logic ICacheStallF, // I$ busy with multicycle operation @@ -331,12 +331,12 @@ module ifu ( .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .BPWrongM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); assign BPWrongE = PCSrcE; - assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, IClassWrongM} = '0; + assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 5fc5d18e9..d97be53f6 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -62,7 +62,7 @@ module csr #(parameter input logic ICacheStallF, input logic DCacheStallM, input logic BPDirPredWrongM, - input logic BTBPredPCWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong @@ -266,7 +266,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index feab9404f..b4f89f188 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -46,7 +46,7 @@ module csrc #(parameter input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, input logic CSRMWriteM, CSRWriteM, input logic BPDirPredWrongM, - input logic BTBPredPCWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong @@ -99,7 +99,7 @@ module csrc #(parameter assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction - assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target + assign CounterEvent[8] = BTAWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index fd4bb0f83..251dbb3d6 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -50,7 +50,7 @@ module privileged ( input logic ICacheStallF, // I cache stalled input logic DCacheStallM, // D cache stalled input logic BPDirPredWrongM, // branch predictor guessed wrong direction - input logic BTBPredPCWrongM, // branch predictor guessed wrong target + input logic BTAWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target input logic IClassWrongM, // branch predictor guessed wrong instruction class input logic BPWrongM, // branch predictor is wrong @@ -130,7 +130,7 @@ module privileged ( .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .BPWrongM, .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 4e68ce481..6c2d5816b 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -142,7 +142,7 @@ module wallypipelinedcore ( logic BPWrongE, BPWrongM; logic BPDirPredWrongM; - logic BTBPredPCWrongM; + logic BTAWrongM; logic RASPredPCWrongM; logic IClassWrongM; logic [3:0] InstrClassM; @@ -178,7 +178,7 @@ module wallypipelinedcore ( // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, - .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, + .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management @@ -291,7 +291,7 @@ module wallypipelinedcore ( .RetM, .TrapM, .sfencevmaM, .FenceM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM,