More cleanup of IFU.

This commit is contained in:
Ross Thompson 2022-02-01 14:32:27 -06:00
parent 554df2377e
commit 910d16b642
2 changed files with 66 additions and 115 deletions

View File

@ -35,10 +35,11 @@
module bpred module bpred
(input logic clk, reset, (input logic clk, reset,
input logic StallF, StallD, StallE, input logic StallF, StallD, StallE, StallM,
input logic FlushF, FlushD, FlushE, input logic FlushF, FlushD, FlushE, FlushM,
// Fetch stage // Fetch stage
// the prediction // the prediction
input logic [31:0] InstrD,
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
output logic [`XLEN-1:0] BPPredPCF, output logic [`XLEN-1:0] BPPredPCF,
output logic SelBPPredF, output logic SelBPPredF,
@ -53,13 +54,14 @@ module bpred
input logic [`XLEN-1:0] IEUAdrE, // The branch destination if the branch is taken. input logic [`XLEN-1:0] IEUAdrE, // The branch destination if the branch is taken.
input logic [`XLEN-1:0] PCD, // The address the branch predictor took. input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [4:0] InstrClassE, output logic [4:0] InstrClassM,
// Report branch prediction status // Report branch prediction status
output logic BPPredWrongE, output logic BPPredWrongE,
output logic BPPredDirWrongE, output logic BPPredWrongM,
output logic BTBPredPCWrongE, output logic BPPredDirWrongM,
output logic RASPredPCWrongE, output logic BTBPredPCWrongM,
output logic BPPredClassNonCFIWrongE output logic RASPredPCWrongM,
output logic BPPredClassNonCFIWrongM
); );
logic BTBValidF; logic BTBValidF;
@ -71,14 +73,14 @@ module bpred
logic FallThroughWrongE; logic FallThroughWrongE;
logic PredictionPCWrongE; logic PredictionPCWrongE;
logic PredictionInstrClassWrongE; logic PredictionInstrClassWrongE;
logic [4:0] InstrClassD, InstrClassE;
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
// Part 1 branch direction prediction // Part 1 branch direction prediction
if (`BPTYPE == "BPTWOBIT") begin:Predictor if (`BPTYPE == "BPTWOBIT") begin:Predictor
twoBitPredictor DirPredictor(.clk(clk), twoBitPredictor DirPredictor(.clk, .reset, .StallF,
.reset(reset),
.StallF(StallF),
.LookUpPC(PCNextF), .LookUpPC(PCNextF),
.Prediction(BPPredF), .Prediction(BPPredF),
// update // update
@ -87,58 +89,27 @@ module bpred
.UpdatePrediction(UpdateBPPredE)); .UpdatePrediction(UpdateBPPredE));
end else if (`BPTYPE == "BPGLOBAL") begin:Predictor end else if (`BPTYPE == "BPGLOBAL") begin:Predictor
globalHistoryPredictor DirPredictor(.clk, .reset, .StallF, .StallE,
.PCNextF, .BPPredF,
.InstrClassE, .BPInstrClassF, .BPInstrClassD, .BPInstrClassE, .BPPredDirWrongE,
.PCE, .PCSrcE, .UpdateBPPredE);
globalHistoryPredictor DirPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.PCNextF(PCNextF),
.BPPredF(BPPredF),
// update
.InstrClassE(InstrClassE),
.BPInstrClassE(BPInstrClassE),
.BPPredDirWrongE(BPPredDirWrongE),
.PCE(PCE),
.PCSrcE(PCSrcE),
.UpdateBPPredE(UpdateBPPredE));
end else if (`BPTYPE == "BPGSHARE") begin:Predictor end else if (`BPTYPE == "BPGSHARE") begin:Predictor
gsharePredictor DirPredictor(.clk, .reset, .StallF, .StallE,
gsharePredictor DirPredictor(.clk(clk), .PCNextF, .BPPredF,
.reset(reset), .InstrClassE, .BPInstrClassF, .BPInstrClassD, .BPInstrClassE, .BPPredDirWrongE,
.*, // Stalls and flushes .PCE, .PCSrcE, .UpdateBPPredE);
.PCNextF(PCNextF),
.BPPredF(BPPredF),
// update
.InstrClassE(InstrClassE),
.BPInstrClassE(BPInstrClassE),
.BPPredDirWrongE(BPPredDirWrongE),
.PCE(PCE),
.PCSrcE(PCSrcE),
.UpdateBPPredE(UpdateBPPredE));
end end
else if (`BPTYPE == "BPLOCALPAg") begin:Predictor else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
localHistoryPredictor DirPredictor(.clk(clk), localHistoryPredictor DirPredictor(.clk,
.reset(reset), .reset, .StallF, .StallE, .FlushF,
.*, // Stalls and flushes
.LookUpPC(PCNextF), .LookUpPC(PCNextF),
.Prediction(BPPredF), .Prediction(BPPredF),
// update // update
.UpdatePC(PCE), .UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE), .UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE), .PCSrcE,
.UpdatePrediction(UpdateBPPredE));
end
else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
localHistoryPredictor DirPredictor(.clk(clk),
.reset(reset),
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE(PCSrcE),
.UpdatePrediction(UpdateBPPredE)); .UpdatePrediction(UpdateBPPredE));
end end
@ -201,15 +172,35 @@ module bpred
.d(BPPredD), .d(BPPredD),
.q(BPPredE)); .q(BPPredE));
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
flopenrc #(5) InstrClassRegE(.clk, .reset, .en(~StallE), .clear(FlushE), .d(InstrClassD), .q(InstrClassE));
flopenrc #(5) InstrClassRegM(.clk, .reset, .en(~StallM), .clear(FlushM), .d(InstrClassE), .q(InstrClassM));
flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM), .d(BPPredWrongE), .q(BPPredWrongM));
// branch predictor
flopenrc #(4) BPPredWrongRegM(.clk, .reset, .en(~StallM), .clear(FlushM),
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
// pipeline the class // pipeline the class
flopenrc #(5) InstrClassRegD(.clk(clk), flopenrc #(5) BPInstrClassRegD(.clk(clk),
.reset(reset), .reset(reset),
.en(~StallD), .en(~StallD),
.clear(FlushD), .clear(FlushD),
.d(BPInstrClassF), .d(BPInstrClassF),
.q(BPInstrClassD)); .q(BPInstrClassD));
flopenrc #(5) InstrClassRegE(.clk(clk), flopenrc #(5) BPInstrClassRegE(.clk(clk),
.reset(reset), .reset(reset),
.en(~StallE), .en(~StallE),
.clear(FlushE), .clear(FlushE),

View File

@ -100,8 +100,6 @@ module ifu (
localparam [31:0] nop = 32'h00000013; // instruction for NOP localparam [31:0] nop = 32'h00000013; // instruction for NOP
logic [`XLEN-1:0] PCBPWrongInvalidate; logic [`XLEN-1:0] PCBPWrongInvalidate;
logic BPPredWrongM;
(* mark_debug = "true" *) logic [`PA_BITS-1:0] PCPF; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. (* mark_debug = "true" *) logic [`PA_BITS-1:0] PCPF; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width.
logic [`XLEN+1:0] PCFExt; logic [`XLEN+1:0] PCFExt;
@ -159,8 +157,7 @@ module ifu (
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW);
end else begin end else begin
assign {ITLBMissF, InstrAccessFaultF} = '0; assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF} = '0;
assign InstrPageFaultF = '0;
assign PCPF = PCF; assign PCPF = PCF;
assign CacheableF = '1; assign CacheableF = '1;
end end
@ -182,22 +179,11 @@ module ifu (
end else begin : bus end else begin : bus
localparam integer WORDSPERLINE = `MEM_ICACHE ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer WORDSPERLINE = `MEM_ICACHE ? `ICACHE_LINELENINBITS/`XLEN : 1;
localparam integer LOGWPL = `MEM_ICACHE ? $clog2(WORDSPERLINE) : 1;
localparam integer LINELEN = `MEM_ICACHE ? `ICACHE_LINELENINBITS : `XLEN; localparam integer LINELEN = `MEM_ICACHE ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer WordCountThreshold = `MEM_ICACHE ? WORDSPERLINE - 1 : 0;
localparam integer LINEBYTELEN = LINELEN/8;
localparam integer OFFSETLEN = $clog2(LINEBYTELEN);
logic [LOGWPL-1:0] WordCount;
logic SelUncachedAdr;
logic [LINELEN-1:0] ICacheMemWriteData; logic [LINELEN-1:0] ICacheMemWriteData;
logic [`PA_BITS-1:0] LocalIFUBusAdr;
logic [`PA_BITS-1:0] ICacheBusAdr; logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck; logic ICacheBusAck;
genvar index;
busdp #(WORDSPERLINE, LINELEN) busdp #(WORDSPERLINE, LINELEN)
busdp(.clk, .reset, busdp(.clk, .reset,
@ -226,7 +212,7 @@ module ifu (
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
.FinalWriteData('0), .FinalWriteData('0),
.RW(IFURWF), .RW(IFURWF),
.Atomic(2'b00), .FlushCache(1'b0), .Atomic('0), .FlushCache('0),
.NextAdr(PCNextFSpill[11:0]), .NextAdr(PCNextFSpill[11:0]),
.PAdr(PCPF), .PAdr(PCPF),
.CacheCommitted(), .InvalidateCacheM(InvalidateICacheM)); .CacheCommitted(), .InvalidateCacheM(InvalidateICacheM));
@ -237,13 +223,8 @@ module ifu (
end end
end end
// branch predictor signal // branch predictor signal
logic SelBPPredF; logic [`XLEN-1:0] PCNext1F, PCNext2F;
logic [`XLEN-1:0] BPPredPCF, PCNext0F, PCNext1F, PCNext2F;
logic [4:0] InstrClassD, InstrClassE;
assign IFUCacheBusStallF = ICacheStallF | BusStall; assign IFUCacheBusStallF = ICacheStallF | BusStall;
assign IFUStallF = IFUCacheBusStallF | SelNextSpillF; assign IFUStallF = IFUCacheBusStallF | SelNextSpillF;
@ -253,14 +234,9 @@ module ifu (
assign PrivilegedChangePCM = RetM | TrapM; assign PrivilegedChangePCM = RetM | TrapM;
// *** move unnecessary muxes into BPRED_ENABLED
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F));
mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F));
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE));
mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F));
// Mux only required on instruction class miss prediction.
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), .s(BPPredWrongM), .y(PCBPWrongInvalidate));
mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
@ -268,38 +244,28 @@ module ifu (
// branch and jump predictor // branch and jump predictor
if (`BPRED_ENABLED) begin : bpred if (`BPRED_ENABLED) begin : bpred
// *** move the rest of this hardware into branch predictor including instruction class registers logic SelBPPredF;
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; logic [`XLEN-1:0] BPPredPCF, PCNext0F;
logic BPPredWrongM;
flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM), .d(BPPredWrongE), .q(BPPredWrongM));
bpred bpred(.clk, .reset, bpred bpred(.clk, .reset,
.StallF, .StallD, .StallE, .StallF, .StallD, .StallE, .StallM,
.FlushF, .FlushD, .FlushE, .FlushF, .FlushD, .FlushE, .FlushM,
.PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE, .InstrD, .PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE,
.PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .BPPredWrongM,
.BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE); .BPPredDirWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM);
// the branch predictor needs a compact decoding of the instruction class. mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F));
// *** consider adding in the alternate return address x5 for returns. mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F));
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5 // Mux only required on instruction class miss prediction.
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return .s(BPPredWrongM), .y(PCBPWrongInvalidate));
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
// branch predictor
flopenrc #(5) InstrClassRegE(.clk, .reset, .en(~StallE), .clear(FlushE), .d(InstrClassD), .q(InstrClassE));
flopenrc #(5) InstrClassRegM(.clk, .reset, .en(~StallM), .clear(FlushM), .d(InstrClassE), .q(InstrClassM));
flopenrc #(4) BPPredWrongRegM(.clk, .reset, .en(~StallM), .clear(FlushM),
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
end else begin : bpred end else begin : bpred
assign BPPredPCF = '0;
assign BPPredWrongE = PCSrcE; assign BPPredWrongE = PCSrcE;
assign BPPredWrongM = '0; assign {BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0;
assign {SelBPPredF, BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; assign PCNext1F = PCPlus2or4F;
assign PCBPWrongInvalidate = PCE;
end end
// pcadder // pcadder
@ -313,16 +279,12 @@ module ifu (
else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10};
else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4 else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4
// Decode stage pipeline register and logic // Decode stage pipeline register and logic
flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD);
// expand 16-bit compressed instructions to 32 bits // expand 16-bit compressed instructions to 32 bits
decompress decomp(.InstrRawD, .InstrD, .IllegalCompInstrD); decompress decomp(.InstrRawD, .InstrD, .IllegalCompInstrD);
assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
// *** combine these with others in better way, including M, F
// Misaligned PC logic // Misaligned PC logic
// Instruction address misalignement only from br/jal(r) instructions. // Instruction address misalignement only from br/jal(r) instructions.
@ -336,7 +298,6 @@ module ifu (
// Traps: Cant happen. The bottom two bits of MTVEC are ignored so the trap always is to a multiple of 4. See 3.1.7 of the privileged spec. // Traps: Cant happen. The bottom two bits of MTVEC are ignored so the trap always is to a multiple of 4. See 3.1.7 of the privileged spec.
assign BranchMisalignedFaultE = (IEUAdrE[1] & ~`C_SUPPORTED) & PCSrcE; assign BranchMisalignedFaultE = (IEUAdrE[1] & ~`C_SUPPORTED) & PCSrcE;
flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, InstrMisalignedFaultM); flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, InstrMisalignedFaultM);
// *** Ross Thompson. Check InstrMisalignedAdrM as I believe it is the same as PCF. Should be able to remove.
flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM); flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM);
// Instruction and PC/PCLink pipeline registers // Instruction and PC/PCLink pipeline registers
@ -347,4 +308,3 @@ module ifu (
flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD);
flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE);
endmodule endmodule