diff --git a/sim/imperas.ic b/sim/imperas.ic index d28234bbc..69d57f702 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -1,3 +1,7 @@ +#--showoverrides +#--help --helpall +--traceregs + --override cpu/show_c_prefix=T --override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 @@ -33,3 +37,6 @@ # ignore settings of bits DAU for non leaf page table walks --override cpu/ignore_non_leaf_DAU=1 + +# mimpid = 0x100 +--override cpu/mimpid=0x100 diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 650e8367d..85d23d373 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -30,7 +30,7 @@ module hazard ( // Detect hazards - input logic BPPredWrongE, CSRWriteFenceM, RetM, TrapM, + input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LSUStallM, IFUStallF, input logic FCvtIntStallD, FPUStallD, @@ -65,8 +65,8 @@ module hazard ( // Similarly, CSR writes and fences flush all subsequent instructions and refetch them in light of the new operating modes and cache/TLB contents // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete - assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPPredWrongE; - assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPPredWrongE & ~(DivBusyE | FDivBusyE)); + assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; + assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); assign FlushMCause = TrapM | RetM | CSRWriteFenceM; assign FlushWCause = TrapM; diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 5f14a028e..d5fd0c019 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,11 +33,11 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic WrongBPRetD, // Prediction class is wrong - input logic RetD, - input logic RetE, JalE, // Instr class - input logic BPRetF, - input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal + input logic BPReturnWrongD, // Prediction class is wrong + input logic ReturnD, + input logic ReturnE, CallE, // Instr class + input logic BPReturnF, + input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -54,21 +54,21 @@ module RASPredictor #(parameter int StackSize = 16 )( logic IncrRepairD, DecRepairD; logic DecrementPtr; - logic FlushedRetDE; - logic WrongPredRetD; + logic FlushedReturnDE; + logic WrongPredReturnD; - assign PopF = BPRetF & ~StallD & ~FlushD; - assign PushE = JalE & ~StallM & ~FlushM; + assign PopF = BPReturnF & ~StallD & ~FlushD; + assign PushE = CallE & ~StallM & ~FlushM; - assign WrongPredRetD = (WrongBPRetD) & ~StallE & ~FlushE; - assign FlushedRetDE = (~StallE & FlushE & RetD) | (~StallM & FlushM & RetE); // flushed ret + assign WrongPredReturnD = (BPReturnWrongD) & ~StallE & ~FlushE; + assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (~StallM & FlushM & ReturnE); // flushed return - assign RepairD = WrongPredRetD | FlushedRetDE ; + assign RepairD = WrongPredReturnD | FlushedReturnDE ; - assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~RetD); // Guessed it was a ret, but its not + assign IncrRepairD = FlushedReturnDE | (WrongPredReturnD & ~ReturnD); // Guessed it was a return, but its not - assign DecRepairD = WrongPredRetD & RetD; // Guessed non ret but is a ret. + assign DecRepairD = WrongPredReturnD & ReturnD; // Guessed non return but is a return. assign CounterEn = PopF | PushE | RepairD; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index c802ffb21..92da8622e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -39,7 +39,7 @@ module bpred ( input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class input logic [`XLEN-1:0] PCNextF, // Next Fetch Address input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 - output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction + output logic [`XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage // Update Predictor @@ -58,74 +58,76 @@ module bpred ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br + output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong - output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BPWrongE, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM // Class prediction is wrong + output logic IClassWrongM // Class prediction is wrong ); - logic [1:0] DirPredictionF; + logic [1:0] BPDirPredF; - logic [`XLEN-1:0] BTAF, RASPCF; - logic PredictionPCWrongE; - logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic DirPredictionWrongE; + logic [`XLEN-1:0] BTAF, RASPCF; + logic BPPCWrongE; + logic IClassWrongE; + logic BPDirPredWrongE; - logic BPPCSrcF; - logic [`XLEN-1:0] BPPredPCF; - logic [`XLEN-1:0] PCNext0F; - logic [`XLEN-1:0] PCCorrectE; - logic [3:0] WrongPredInstrClassD; + logic BPPCSrcF; + logic [`XLEN-1:0] BPPCF; + logic [`XLEN-1:0] PC0NextF; + logic [`XLEN-1:0] PCCorrectE; + logic [3:0] WrongPredInstrClassD; - logic BTBTargetWrongE; - logic RASTargetWrongE; + logic BTBTargetWrongE; + logic RASTargetWrongE; - logic [`XLEN-1:0] BTAD; + logic [`XLEN-1:0] BTAD; - logic BTBJalF, BTBRetF, BTBJumpF, BTBBranchF; - logic BPBranchF, BPJumpF, BPRetF, BPJalF; - logic BPBranchD, BPJumpD, BPRetD, BPJalD; - logic RetD, JalD; - logic RetE, JalE; - logic BranchM, JumpM, RetM, JalM; - logic BranchW, JumpW, RetW, JalW; - logic WrongBPRetD; - logic [`XLEN-1:0] PCW, IEUAdrW; + logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPReturnF, BPCallF; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + logic ReturnD, CallD; + logic ReturnE, CallE; + logic BranchM, JumpM, ReturnM, CallM; + logic BranchW, JumpW, ReturnW, CallW; + logic BPReturnWrongD; + logic [`XLEN-1:0] BTAE; + + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor @@ -134,7 +136,7 @@ module bpred ( localHistoryPredictor DirPredictor(.clk, .reset, .StallF, .StallE, .LookUpPC(PCNextF), - .Prediction(DirPredictionF), + .Prediction(BPDirPredF), // update .UpdatePC(PCE), .UpdateEN(InstrClassE[0] & ~StallE), @@ -148,144 +150,83 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, - .BTAF, .BTAD, - .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), - .PredictionInstrClassWrongM, - .IEUAdrE, .IEUAdrM, .IEUAdrW, - .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}), - .InstrClassW({JalW, RetW, JumpW, BranchW})); + .PCNextF, .PCF, .PCD, .PCE, .PCM, + .BTAF, .BTAD, .BTAE, + .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), + .IClassWrongM, .IClassWrongE, + .IEUAdrE, .IEUAdrM, + .InstrClassD({CallD, ReturnD, JumpD, BranchD}), + .InstrClassE({CallE, ReturnE, JumpE, BranchE}), + .InstrClassM({CallM, ReturnM, JumpM, BranchM}), + .InstrClassW({CallW, ReturnW, JumpW, BranchW})); - if (!`INSTR_CLASS_PRED) begin : DirectClassDecode - // This section is mainly for testing, verification, and PPA comparison. - // An alternative to using the BTB to store the instruction class is to partially decode - // the instructions in the Fetch stage into, Jal, Ret, Jump, and Branch instructions. - // This logic is not described in the text book as of 23 February 2023. - logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; - logic NCJumpF, NCBranchF; + icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, + .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD); - if(`C_SUPPORTED) begin - logic [4:0] CompressedOpcF; - assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; - assign cj = CompressedOpcF == 5'h0d; - assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign CJumpF = cjal | cj | cjr | cjalr; - assign CBranchF = CompressedOpcF[4:1] == 4'h7; - end else begin - assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; - end - - assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; - assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; - - assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); - assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); - assign BPRetF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - - assign BPJalF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - - end else begin - // This section connects the BTB's instruction class prediction. - assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; - end - assign BPPCSrcF = (BPBranchF & DirPredictionF[1]) | BPJumpF; - // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .BPRetF, .RetD, .RetE, .JalE, - .WrongBPRetD, .RASPCF, .PCLinkE); + .BPReturnF, .ReturnD, .ReturnE, .CallE, + .BPReturnWrongD, .RASPCF, .PCLinkE); - assign BPPredPCF = BPRetF ? RASPCF : BTAF; - - assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - - flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {JalE, RetE, JumpE, BranchE}, {JalM, RetM, JumpM, BranchM}); - flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {JalM, RetM, JumpM, BranchM}, {JalW, RetW, JumpW, BranchW}); - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); - - // branch predictor - flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); - - // pipeline the predicted class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPJalF, BPRetF, BPJumpF, BPBranchF}, {BPJalD, BPRetD, BPJumpD, BPBranchD}); - // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if the class prediction is wrong a regular instruction may have been predicted as a taken branch // this will result in PCD not being equal to the fall through address PCLinkE (PCE+4). // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. - assign PredictionPCWrongE = PCCorrectE != PCD; - - // branch class prediction wrong. - assign AnyWrongPredInstrClassD = |({BPJalD, BPRetD, BPJumpD, BPBranchD} ^ {JalD, RetD, JumpD, BranchD}); - assign WrongBPRetD = BPRetD ^ RetD; - // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; - - logic BPPredWrongEAlt; - logic NotMatch; - assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark - assign NotMatch = BPPredWrongE != BPPredWrongEAlt; - + assign BPWrongE = (PCCorrectE != PCD) & InstrValidE & InstrValidD; + flopenrc #(1) BPWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPWrongM); + // Output the predicted PC or corrected PC on miss-predict. + assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; + mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF); // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF); // If the prediction is wrong select the correct address. - mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); + mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF); // Correct branch/jump target. mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); // If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE. // Effectively this is PCM+4 or the non-existant PCLinkM - if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); + if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE); else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin - logic JumpOrTakenBranchE; - logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; - logic BTBPredPCWrongE, RASPredPCWrongE; - // performance counters - // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now - // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) - // 3. target ras (ras target wrong / class[2]) - // 4. direction (br dir wrong / class[0]) + logic JumpOrTakenBranchE; + logic [`XLEN-1:0] RASPCD, RASPCE; + logic BTBPredPCWrongE, RASPredPCWrongE; + // performance counters + // 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now + // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) + // 3. target ras (ras target wrong / class[2]) + // 4. direction (br dir wrong / class[0]) - // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction - // could be wrong or the fall through address selected for branch predict not taken. - // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of - // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~RetE) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & RetE & PCSrcE; + // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction + // could be wrong or the fall through address selected for branch predict not taken. + // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of + // both without the above inaccuracies. + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; - assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; - - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); + assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; + + flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); - flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); - flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM}); - + flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); + flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); + flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, + {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, + {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); + end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; + assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; end // **** Fix me - assign InstrClassM = {JalM, RetM, JumpM, BranchM}; - flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); - flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); - + assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index d2f0cb77b..b14399704 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -34,28 +34,33 @@ module btb #(parameter Depth = 10 ) ( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC - output logic [`XLEN-1:0] BTAD, - output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAE, + output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update - input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong + input logic IClassWrongM, // BTB's instruction class guess was wrong + input logic IClassWrongE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb - input logic [`XLEN-1:0] IEUAdrW, input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb input logic [3:0] InstrClassM, // Instruction class to insert into btb input logic [3:0] InstrClassW ); - logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; - logic [`XLEN-1:0] ResetPC; - logic MatchD, MatchE, MatchM, MatchW, MatchX; - logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+3:0] TableBTBPredictionF; - logic UpdateEn; - + logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; + logic [`XLEN-1:0] ResetPC; + logic MatchD, MatchE, MatchM, MatchW, MatchX; + logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredF; + logic [`XLEN-1:0] IEUAdrW; + logic [`XLEN-1:0] PCW; + logic BTBWrongE, BTAWrongE; + logic BTBWrongM, BTAWrongM; + + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if @@ -84,15 +89,27 @@ module btb #(parameter Depth = 10 ) ( MatchM ? {InstrClassM, IEUAdrM} : {InstrClassW, IEUAdrW} ; - assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF}; + assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( - .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + // BTAE is not strickly necessary. However it is used by two parts of wally. + // 1. It gates updates to the BTB when the prediction does not change. This save power. + // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); + assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); + + flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM); + assign BTBWrongM = BTAWrongM | IClassWrongM; + + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); + + endmodule diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 70c03afb0..66b2b4842 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -35,18 +35,18 @@ module gshare #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW, + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE ); logic MatchF, MatchD, MatchE, MatchM, MatchW; logic MatchX; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPredictionF; - logic [1:0] NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; + logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, FwdNewDirPredF; + logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW; logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; @@ -76,33 +76,33 @@ module gshare #(parameter k = 10, assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewDirPredictionF = MatchD ? {2{DirPredictionD[1]}} : - MatchE ? {NewDirPredictionE} : - MatchM ? {NewDirPredictionM} : - NewDirPredictionW ; + assign FwdNewDirPredF = MatchD ? {2{BPDirPredD[1]}} : + MatchE ? {NewBPDirPredE} : + MatchM ? {NewBPDirPredM} : + NewBPDirPredW ; - assign DirPredictionF = MatchX ? ForwardNewDirPredictionF : TableDirPredictionF; + assign BPDirPredF = MatchX ? FwdNewDirPredF : TableBPDirPredF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(TableDirPredictionF), + .rd1(TableBPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); - flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewDirPredictionM, NewDirPredictionW); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); + flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - assign GHRNextF = BPBranchF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; - assign GHRF = BranchD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; + assign GHRNextF = BPBranchF ? {BPDirPredF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchD ? {BPDirPredD[1], GHRD[k-1:1]} : GHRD; assign GHRD = BranchE ? {PCSrcE, GHRE[k-1:1]} : GHRE; assign GHRE = BranchM ? {PCSrcM, GHRM[k-1:1]} : GHRM; diff --git a/src/ifu/bpred/gsharebasic.sv b/src/ifu/bpred/gsharebasic.sv index e793e7ac6..130f17328 100644 --- a/src/ifu/bpred/gsharebasic.sv +++ b/src/ifu/bpred/gsharebasic.sv @@ -35,16 +35,16 @@ module gsharebasic #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update input logic [`XLEN-1:0] PCNextF, PCM, input logic BranchE, BranchM, PCSrcE ); logic [k-1:0] IndexNextF, IndexM; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; @@ -61,19 +61,19 @@ module gsharebasic #(parameter k = 10, ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; assign GHRNext = BranchM ? {PCSrcM, GHR[k-1:1]} : GHR; flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchM, GHRNext, GHR); diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv new file mode 100644 index 000000000..14e7c8d89 --- /dev/null +++ b/src/ifu/bpred/icpred.sv @@ -0,0 +1,106 @@ +/////////////////////////////////////////// +// icpred.sv +// +// Written: Ross Thomposn ross1728@gmail.com +// Created: February 26, 2023 +// Modified: February 26, 2023 +// +// Purpose: Partial decode of instructions into control flow instructions (cfi)P +// Call, Return, Jump, and Branch +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + + +module icpred #(parameter INSTR_CLASS_PRED = 1)( + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction + input logic BranchD, BranchE, + input logic JumpD, JumpE, + output logic BranchM, BranchW, + output logic JumpM, JumpW, + output logic CallD, CallE, CallM, CallW, + output logic ReturnD, ReturnE, ReturnM, ReturnW, + input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, + output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, + output logic IClassWrongM, BPReturnWrongD, IClassWrongE +); + + logic IClassWrongD; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + + if (!INSTR_CLASS_PRED) begin : DirectClassDecode + // This section is mainly for testing, verification, and PPA comparison. + // An alternative to using the BTB to store the instruction class is to partially decode + // the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions. + // This logic is not described in the text book as of 23 February 2023. + logic ccall, cj, cjr, ccallr, CJumpF, CBranchF; + logic NCJumpF, NCBranchF; + + if(`C_SUPPORTED) begin + logic [4:0] CompressedOpcF; + assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; + assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32; + assign cj = CompressedOpcF == 5'h0d; + assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = ccall | cj | cjr | ccallr; + assign CBranchF = CompressedOpcF[4:1] == 4'h7; + end else begin + assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0; + end + + assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; + + assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); + assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5 + (`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + + assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5 + (`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + + end else begin + // This section connects the BTB's instruction class prediction. + assign {BPCallF, BPReturnF, BPJumpF, BPBranchF} = {BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}; + end + + assign ReturnD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // returnurn must returnurn to ra or x5 + assign CallD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // call(r) must link to ra or x5 + + flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {CallD, ReturnD}, {CallE, ReturnE}); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {CallE, ReturnE, JumpE, BranchE}, {CallM, ReturnM, JumpM, BranchM}); + flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {CallM, ReturnM, JumpM, BranchM}, {CallW, ReturnW, JumpW, BranchW}); + + // branch predictor + flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, IClassWrongE, IClassWrongM); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, IClassWrongD, IClassWrongE); + + // pipeline the predicted class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPCallF, BPReturnF, BPJumpF, BPBranchF}, {BPCallD, BPReturnD, BPJumpD, BPBranchD}); + + // branch class prediction wrong. + assign IClassWrongD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); + assign BPReturnWrongD = BPReturnD ^ ReturnD; + +endmodule diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 58bf1c6bd..7011a0580 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -34,8 +34,8 @@ module twoBitPredictor #(parameter k = 10) ( input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCM, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, input logic BranchE, BranchM, input logic PCSrcE ); @@ -43,8 +43,8 @@ module twoBitPredictor #(parameter k = 10) ( logic [k-1:0] IndexNextF, IndexM; logic [1:0] PredictionMemory; logic DoForwarding, DoForwardingF; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; // hashing function for indexing the PC // We have k bits to index, but XLEN bits as the input. @@ -57,19 +57,19 @@ module twoBitPredictor #(parameter k = 10) ( ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), + .wd2(NewBPDirPredM), .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 887d1f45c..b049a956f 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -54,22 +54,22 @@ module ifu ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address output logic [`XLEN-1:0] PCE, // Execution stage instruction address - output logic BPPredWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong + output logic BPWrongE, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong // Mem output logic CommittedF, // I$ or bus memory operation started, delay interrupts input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. - output logic [`XLEN-1:0] PCNext2F, // Selected PC between branch prediction and next valid PC if CSRWriteFence + output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence output logic [31:0] InstrD, // The decoded instruction in Decode stage output logic [31:0] InstrM, // The decoded instruction in Memory stage output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic JumpOrTakenBranchM, - output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM, // Class prediction is wrong + output logic IClassWrongM, // Class prediction is wrong // Faults input logic IllegalBaseInstrD, // Illegal non-compressed instruction input logic IllegalFPUInstrD, // Illegal FP instruction @@ -132,7 +132,7 @@ module ifu ( logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal - logic [`XLEN-1:0] PCNext1F; // Branch predictor next PCF + logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF logic BusCommittedF; // Bus memory operation in flight, delay interrupts logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM @@ -297,8 +297,8 @@ module ifu ( //////////////////////////////////////////////////////////////////////////////////////////////// if(`ZICSR_SUPPORTED | `ZIFENCEI_SUPPORTED) - mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PCNext2F)); - else assign PCNext2F = PCNext1F; + mux2 #(`XLEN) pcmux2(.d0(PC1NextF), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PC2NextF)); + else assign PC2NextF = PC1NextF; assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); @@ -330,14 +330,14 @@ module ifu ( .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, - .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); + .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred - mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); - assign BPPredWrongE = PCSrcE; - assign {InstrClassM, DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; + mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); + assign BPWrongE = PCSrcE; + assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 7b765bae0..a54c05b2a 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -37,7 +37,7 @@ module csr #(parameter input logic FlushM, FlushW, input logic StallE, StallM, StallW, input logic [31:0] InstrM, // current instruction - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return logic input logic [`XLEN-1:0] SrcAM, IEUAdrM, // SrcA and memory address from IEU input logic CSRReadM, CSRWriteM, // read or write CSR input logic TrapM, // trap is occurring @@ -57,11 +57,11 @@ module csr #(parameter input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, - input logic DirPredictionWrongM, + input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic IClassWrongM, + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, @@ -155,7 +155,7 @@ module csr #(parameter // A return sets the PC to MEPC or SEPC assign RetM = mretM | sretM; mux2 #(`XLEN) epcmux(SEPC_REGW, MEPC_REGW, mretM, EPC); - mux3 #(`XLEN) pcmux3(PCNext2F, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); + mux3 #(`XLEN) pcmux3(PC2NextF, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); /////////////////////////////////////////// // CSRWriteValM @@ -259,7 +259,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPPredWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .JumpOrTakenBranchM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index d61835826..f2b4b0e71 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -44,11 +44,11 @@ module csrc #(parameter input logic StallE, StallM, input logic FlushM, input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, - input logic DirPredictionWrongM, + input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic IClassWrongM, + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, @@ -86,18 +86,18 @@ module csrc #(parameter assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions - assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong + assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[14] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = BPPredWrongM & InstrValidNotFlushedM; // branch predictor wrong + assign CounterEvent[15] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong assign CounterEvent[`COUNTERS-1:16] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 300da8a65..f7a3caad6 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -38,7 +38,7 @@ module privileged ( input logic [`XLEN-1:0] SrcAM, // GPR register to write input logic [31:0] InstrM, // Instruction input logic [`XLEN-1:0] IEUAdrM, // address from IEU - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return PC logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic // control signals input logic InstrValidM, // Current instruction is valid (not flushed) input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt @@ -46,11 +46,11 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic DirPredictionWrongM, // branch predictor guessed wrong directoin + input logic BPDirPredWrongM, // branch predictor guessed wrong directoin input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target - input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class - input logic BPPredWrongM, // branch predictor is wrong + input logic IClassWrongM, // branch predictor guessed wrong instruction class + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, // data cache miss @@ -121,12 +121,12 @@ module privileged ( // Control and Status Registers csr csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, - .InstrM, .PCM, .SrcAM, .IEUAdrM, .PCNext2F, + .InstrM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredWrongM, - .PredictionInstrClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, + .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index d5458ed74..8f8885934 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -66,7 +66,7 @@ module wallypipelinedcore ( logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE; logic [`XLEN-1:0] PCM; logic [`XLEN-1:0] CSRReadValW, MDUResultW; - logic [`XLEN-1:0] UnalignedPCNextF, PCNext2F; + logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; logic [1:0] MemRWM; logic InstrValidD, InstrValidE, InstrValidM; logic InstrMisalignedFaultM; @@ -140,11 +140,11 @@ module wallypipelinedcore ( logic LSUHWRITE; logic LSUHREADY; - logic BPPredWrongE, BPPredWrongM; - logic DirPredictionWrongM; + logic BPWrongE, BPWrongM; + logic BPDirPredWrongM; logic BTBPredPCWrongM; logic RASPredPCWrongM; - logic PredictionInstrClassWrongM; + logic IClassWrongM; logic [3:0] InstrClassM; logic InstrAccessFaultF, HPTWInstrAccessFaultM; logic [2:0] LSUHSIZE; @@ -169,15 +169,15 @@ module wallypipelinedcore ( .InstrValidM, .InstrValidE, .InstrValidD, .BranchD, .BranchE, .JumpD, .JumpE, // Fetch - .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, + .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute - .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPPredWrongE, .BPPredWrongM, + .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, - .InstrD, .InstrM, .PCM, .InstrClassM, .DirPredictionWrongM, .JumpOrTakenBranchM, - .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, + .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, + .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management @@ -268,7 +268,7 @@ module wallypipelinedcore ( // global stall and flush control hazard hzu( - .BPPredWrongE, .CSRWriteFenceM, .RetM, .TrapM, + .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, .FCvtIntStallD, .FPUStallD, @@ -284,13 +284,13 @@ module wallypipelinedcore ( privileged priv( .clk, .reset, .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PCNext2F, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, .RetM, .TrapM, .sfencevmaM, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .BPPredWrongM, - .RASPredPCWrongM, .PredictionInstrClassWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, + .RASPredPCWrongM, .IClassWrongM, .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, @@ -304,7 +304,7 @@ module wallypipelinedcore ( .FRM_REGW,.BreakpointFaultM, .EcallFaultM, .WFIStallM, .BigEndianM); end else begin assign CSRReadValW = 0; - assign UnalignedPCNextF = PCNext2F; + assign UnalignedPCNextF = PC2NextF; assign RetM = 0; assign TrapM = 0; assign WFIStallM = 0; diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index f26339007..cbca49275 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -137,7 +137,12 @@ module testbench; .CMP_CSR (1) ) idv_trace2api(rvvi); + int PRIV_RWX = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE | RVVI_MEMORY_PRIVILEGE_EXEC; + int PRIV_RW = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE; + int PRIV_X = RVVI_MEMORY_PRIVILEGE_EXEC; + initial begin + MAX_ERRS = 3; // Initialize REF (do this before initializing the DUT) @@ -158,6 +163,41 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME + + // cannot predict this register due to latency between + // pending and taken + void'(rvviRefCsrSetVolatile(0, 32'h344)); + rvviRefCsrCompareEnable(0, 32'h344, RVVI_FALSE); + + // Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC}) + void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0)); + if (`BOOTROM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`BOOTROM_BASE, (`BOOTROM_BASE + `BOOTROM_RANGE), PRIV_X)); + if (`UNCORE_RAM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`UNCORE_RAM_BASE, (`UNCORE_RAM_BASE + `UNCORE_RAM_RANGE), PRIV_RWX)); + if (`EXT_MEM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`EXT_MEM_BASE, (`EXT_MEM_BASE + `EXT_MEM_RANGE), PRIV_RWX)); + + if (`CLINT_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); + end + if (`GPIO_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE))); + end + if (`UART_SUPPORTED) begin + void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); + void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + end + if (`PLIC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE))); + end + if (`SDC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`SDC_BASE, (`SDC_BASE + `SDC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`SDC_BASE, (`SDC_BASE + `SDC_RANGE))); + end if(`XLEN==32) begin void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH @@ -166,14 +206,15 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH end - // Enable the trace2log module - if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); - end - - if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); - end + // These should be done in the attached client +// // Enable the trace2log module +// if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); +// end +// +// if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); +// end end final begin