IFU simplifications.

This commit is contained in:
Ross Thompson 2022-01-26 13:54:59 -06:00
parent 0023c4cb57
commit 2c982dca03

View File

@ -98,7 +98,6 @@ module ifu (
localparam [31:0] nop = 32'h00000013; // instruction for NOP localparam [31:0] nop = 32'h00000013; // instruction for NOP
logic reset_q; // see comment below about PCNextF and icache. logic reset_q; // see comment below about PCNextF and icache.
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
logic [`XLEN-1:0] PCBPWrongInvalidate; logic [`XLEN-1:0] PCBPWrongInvalidate;
logic BPPredWrongM; logic BPPredWrongM;
@ -355,16 +354,8 @@ module ifu (
//.y(UnalignedPCNextF)); //.y(UnalignedPCNextF));
.y(PCNext3F)); .y(PCNext3F));
// This mux is not strictly speaking required. Because the icache takes in // This mux is required as PCNextF needs to be the valid reset vector during reset.
// PCNextF rather than PCPF, PCNextF should stay in reset while the cache // Reseting PCF does not accomplish this as PCNextF will be +2/4 more than PCF.
// looks up the addresses. Without this mux PCNextF will increment + 2/4.
// When the icache fsm is out of reset then it will report on the status
// of PCF + 2/4. It will be a miss since this is the very first access.
// On the next cycle the cache will start using PCPF to finish the read.
// Because the granularity of a cache line +2/4 will always fit in the same
// cache line so the mux is not required. I am leaving this comment and mux
// a a reminder as to what is happening in case keep PCNextF at RESET_VECTOR
// during reset becomes a requirement.
mux2 #(`XLEN) pcmux4(.d0(PCNext3F), mux2 #(`XLEN) pcmux4(.d0(PCNext3F),
.d1(`RESET_VECTOR), .d1(`RESET_VECTOR),
.s(`MEM_IROM ? reset : reset_q), .s(`MEM_IROM ? reset : reset_q),
@ -376,8 +367,10 @@ module ifu (
.d(BPPredWrongE), .q(BPPredWrongM)); .d(BPPredWrongE), .q(BPPredWrongM));
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
.s(BPPredWrongM & InvalidateICacheM), .s(BPPredWrongM), // & InvalidateICacheM *** check with linux.
.y(PCBPWrongInvalidate)); .y(PCBPWrongInvalidate));
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
@ -385,6 +378,8 @@ module ifu (
// branch and jump predictor // branch and jump predictor
if (`BPRED_ENABLED) begin : bpred if (`BPRED_ENABLED) begin : bpred
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
bpred bpred(.clk, .reset, bpred bpred(.clk, .reset,
.StallF, .StallD, .StallE, .StallF, .StallD, .StallE,
.FlushF, .FlushD, .FlushE, .FlushF, .FlushD, .FlushE,
@ -392,17 +387,26 @@ module ifu (
.PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE, .PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE,
.BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE); .BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE);
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
// branch predictor
flopenrc #(5) InstrClassRegE(.clk, .reset, .en(~StallE), .clear(FlushE), .d(InstrClassD), .q(InstrClassE));
flopenrc #(5) InstrClassRegM(.clk, .reset, .en(~StallM), .clear(FlushM), .d(InstrClassE), .q(InstrClassM));
flopenrc #(4) BPPredWrongRegM(.clk, .reset, .en(~StallM), .clear(FlushM),
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
end else begin : bpred end else begin : bpred
assign BPPredPCF = {`XLEN{1'b0}}; assign BPPredPCF = '0;
assign SelBPPredF = 1'b0; assign BPPredWrongM = PCSrcE;
assign BPPredWrongE = PCSrcE; assign {SelBPPredF, BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0;
assign BPPredDirWrongE = 1'b0;
assign BTBPredPCWrongE = 1'b0;
assign RASPredPCWrongE = 1'b0;
assign BPPredClassNonCFIWrongE = 1'b0;
end end
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
// pcadder // pcadder
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
@ -424,13 +428,6 @@ module ifu (
// *** combine these with others in better way, including M, F // *** combine these with others in better way, including M, F
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
// Misaligned PC logic // Misaligned PC logic
// instruction address misalignment is generated by the target of control flow instructions, not // instruction address misalignment is generated by the target of control flow instructions, not
@ -452,26 +449,6 @@ module ifu (
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE);
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
flopenrc #(5) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(InstrClassD),
.q(InstrClassE));
flopenrc #(5) InstrClassRegM(.clk(clk),
.reset(reset),
.en(~StallM),
.clear(FlushM),
.d(InstrClassE),
.q(InstrClassM));
flopenrc #(4) BPPredWrongRegM(.clk(clk),
.reset(reset),
.en(~StallM),
.clear(FlushM),
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or