mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 10:15:19 +00:00
IFU simplifications.
This commit is contained in:
parent
0023c4cb57
commit
2c982dca03
@ -98,7 +98,6 @@ module ifu (
|
|||||||
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
||||||
logic reset_q; // see comment below about PCNextF and icache.
|
logic reset_q; // see comment below about PCNextF and icache.
|
||||||
|
|
||||||
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
|
|
||||||
logic [`XLEN-1:0] PCBPWrongInvalidate;
|
logic [`XLEN-1:0] PCBPWrongInvalidate;
|
||||||
logic BPPredWrongM;
|
logic BPPredWrongM;
|
||||||
|
|
||||||
@ -355,16 +354,8 @@ module ifu (
|
|||||||
//.y(UnalignedPCNextF));
|
//.y(UnalignedPCNextF));
|
||||||
.y(PCNext3F));
|
.y(PCNext3F));
|
||||||
|
|
||||||
// This mux is not strictly speaking required. Because the icache takes in
|
// This mux is required as PCNextF needs to be the valid reset vector during reset.
|
||||||
// PCNextF rather than PCPF, PCNextF should stay in reset while the cache
|
// Reseting PCF does not accomplish this as PCNextF will be +2/4 more than PCF.
|
||||||
// looks up the addresses. Without this mux PCNextF will increment + 2/4.
|
|
||||||
// When the icache fsm is out of reset then it will report on the status
|
|
||||||
// of PCF + 2/4. It will be a miss since this is the very first access.
|
|
||||||
// On the next cycle the cache will start using PCPF to finish the read.
|
|
||||||
// Because the granularity of a cache line +2/4 will always fit in the same
|
|
||||||
// cache line so the mux is not required. I am leaving this comment and mux
|
|
||||||
// a a reminder as to what is happening in case keep PCNextF at RESET_VECTOR
|
|
||||||
// during reset becomes a requirement.
|
|
||||||
mux2 #(`XLEN) pcmux4(.d0(PCNext3F),
|
mux2 #(`XLEN) pcmux4(.d0(PCNext3F),
|
||||||
.d1(`RESET_VECTOR),
|
.d1(`RESET_VECTOR),
|
||||||
.s(`MEM_IROM ? reset : reset_q),
|
.s(`MEM_IROM ? reset : reset_q),
|
||||||
@ -376,8 +367,10 @@ module ifu (
|
|||||||
.d(BPPredWrongE), .q(BPPredWrongM));
|
.d(BPPredWrongE), .q(BPPredWrongM));
|
||||||
|
|
||||||
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
|
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
|
||||||
.s(BPPredWrongM & InvalidateICacheM),
|
.s(BPPredWrongM), // & InvalidateICacheM *** check with linux.
|
||||||
.y(PCBPWrongInvalidate));
|
.y(PCBPWrongInvalidate));
|
||||||
|
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
|
||||||
|
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
|
||||||
|
|
||||||
|
|
||||||
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
|
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
|
||||||
@ -385,24 +378,35 @@ module ifu (
|
|||||||
|
|
||||||
// branch and jump predictor
|
// branch and jump predictor
|
||||||
if (`BPRED_ENABLED) begin : bpred
|
if (`BPRED_ENABLED) begin : bpred
|
||||||
|
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
|
||||||
|
|
||||||
bpred bpred(.clk, .reset,
|
bpred bpred(.clk, .reset,
|
||||||
.StallF, .StallD, .StallE,
|
.StallF, .StallD, .StallE,
|
||||||
.FlushF, .FlushD, .FlushE,
|
.FlushF, .FlushD, .FlushE,
|
||||||
.PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE,
|
.PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE,
|
||||||
.PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE,
|
.PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE,
|
||||||
.BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE);
|
.BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE);
|
||||||
|
|
||||||
|
// the branch predictor needs a compact decoding of the instruction class.
|
||||||
|
// *** consider adding in the alternate return address x5 for returns.
|
||||||
|
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
|
||||||
|
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
|
||||||
|
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
|
||||||
|
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
|
||||||
|
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
|
||||||
|
|
||||||
|
// branch predictor
|
||||||
|
flopenrc #(5) InstrClassRegE(.clk, .reset, .en(~StallE), .clear(FlushE), .d(InstrClassD), .q(InstrClassE));
|
||||||
|
flopenrc #(5) InstrClassRegM(.clk, .reset, .en(~StallM), .clear(FlushM), .d(InstrClassE), .q(InstrClassM));
|
||||||
|
flopenrc #(4) BPPredWrongRegM(.clk, .reset, .en(~StallM), .clear(FlushM),
|
||||||
|
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
|
||||||
|
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
|
||||||
|
|
||||||
end else begin : bpred
|
end else begin : bpred
|
||||||
assign BPPredPCF = {`XLEN{1'b0}};
|
assign BPPredPCF = '0;
|
||||||
assign SelBPPredF = 1'b0;
|
assign BPPredWrongM = PCSrcE;
|
||||||
assign BPPredWrongE = PCSrcE;
|
assign {SelBPPredF, BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0;
|
||||||
assign BPPredDirWrongE = 1'b0;
|
|
||||||
assign BTBPredPCWrongE = 1'b0;
|
|
||||||
assign RASPredPCWrongE = 1'b0;
|
|
||||||
assign BPPredClassNonCFIWrongE = 1'b0;
|
|
||||||
end
|
end
|
||||||
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
|
|
||||||
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
|
|
||||||
|
|
||||||
// pcadder
|
// pcadder
|
||||||
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
|
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
|
||||||
@ -424,13 +428,6 @@ module ifu (
|
|||||||
// *** combine these with others in better way, including M, F
|
// *** combine these with others in better way, including M, F
|
||||||
|
|
||||||
|
|
||||||
// the branch predictor needs a compact decoding of the instruction class.
|
|
||||||
// *** consider adding in the alternate return address x5 for returns.
|
|
||||||
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
|
|
||||||
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
|
|
||||||
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
|
|
||||||
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
|
|
||||||
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
|
|
||||||
|
|
||||||
// Misaligned PC logic
|
// Misaligned PC logic
|
||||||
// instruction address misalignment is generated by the target of control flow instructions, not
|
// instruction address misalignment is generated by the target of control flow instructions, not
|
||||||
@ -452,26 +449,6 @@ module ifu (
|
|||||||
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE);
|
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE);
|
||||||
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
|
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
|
||||||
|
|
||||||
flopenrc #(5) InstrClassRegE(.clk(clk),
|
|
||||||
.reset(reset),
|
|
||||||
.en(~StallE),
|
|
||||||
.clear(FlushE),
|
|
||||||
.d(InstrClassD),
|
|
||||||
.q(InstrClassE));
|
|
||||||
|
|
||||||
flopenrc #(5) InstrClassRegM(.clk(clk),
|
|
||||||
.reset(reset),
|
|
||||||
.en(~StallM),
|
|
||||||
.clear(FlushM),
|
|
||||||
.d(InstrClassE),
|
|
||||||
.q(InstrClassM));
|
|
||||||
|
|
||||||
flopenrc #(4) BPPredWrongRegM(.clk(clk),
|
|
||||||
.reset(reset),
|
|
||||||
.en(~StallM),
|
|
||||||
.clear(FlushM),
|
|
||||||
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
|
|
||||||
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
|
|
||||||
|
|
||||||
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
||||||
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
||||||
|
Loading…
Reference in New Issue
Block a user