mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
406 lines
16 KiB
Systemverilog
406 lines
16 KiB
Systemverilog
///////////////////////////////////////////
|
|
// ifu.sv
|
|
//
|
|
// Written: David_Harris@hmc.edu 9 January 2021
|
|
// Modified:
|
|
//
|
|
// Purpose: Instrunction Fetch Unit
|
|
// PC, branch prediction, instruction cache
|
|
//
|
|
// A component of the Wally configurable RISC-V project.
|
|
//
|
|
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
|
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
|
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
|
// is furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
|
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
///////////////////////////////////////////
|
|
|
|
`include "wally-config.vh"
|
|
|
|
module ifu (
|
|
input logic clk, reset,
|
|
input logic StallF, StallD, StallE, StallM, StallW,
|
|
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
|
// Fetch
|
|
input logic [`XLEN-1:0] IfuBusHRDATA,
|
|
input logic IfuBusAck,
|
|
(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF,
|
|
output logic [`PA_BITS-1:0] IfuBusAdr,
|
|
output logic IfuBusRead,
|
|
output logic IfuStallF,
|
|
// Execute
|
|
output logic [`XLEN-1:0] PCLinkE,
|
|
input logic PCSrcE,
|
|
input logic [`XLEN-1:0] IEUAdrE,
|
|
output logic [`XLEN-1:0] PCE,
|
|
output logic BPPredWrongE,
|
|
// Mem
|
|
input logic RetM, TrapM,
|
|
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
|
input logic InvalidateICacheM,
|
|
output logic [31:0] InstrD, InstrM,
|
|
output logic [`XLEN-1:0] PCM,
|
|
output logic [4:0] InstrClassM,
|
|
output logic BPPredDirWrongM,
|
|
output logic BTBPredPCWrongM,
|
|
output logic RASPredPCWrongM,
|
|
output logic BPPredClassNonCFIWrongM,
|
|
// Writeback
|
|
// output logic [`XLEN-1:0] PCLinkW,
|
|
// Faults
|
|
input logic IllegalBaseInstrFaultD,
|
|
output logic ITLBInstrPageFaultF,
|
|
output logic IllegalIEUInstrFaultD,
|
|
output logic InstrMisalignedFaultM,
|
|
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
|
input logic ExceptionM, PendingInterruptM,
|
|
|
|
|
|
|
|
// mmu management
|
|
input logic [1:0] PrivilegeModeW,
|
|
input logic [`XLEN-1:0] PTE,
|
|
input logic [1:0] PageType,
|
|
input logic [`XLEN-1:0] SATP_REGW,
|
|
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
|
input logic [1:0] STATUS_MPP,
|
|
input logic ITLBWriteF, ITLBFlushF,
|
|
|
|
output logic ITLBMissF,
|
|
|
|
// pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H
|
|
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
|
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
|
|
|
output logic InstrAccessFaultF
|
|
);
|
|
|
|
logic [`XLEN-1:0] PCCorrectE, UnalignedPCNextF, PCNextF;
|
|
logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
|
|
logic PrivilegedChangePCM;
|
|
logic IllegalCompInstrD;
|
|
logic [`XLEN-1:0] PCPlus2or4F, PCLinkD;
|
|
logic [`XLEN-3:0] PCPlusUpperF;
|
|
logic CompressedF;
|
|
logic [31:0] InstrRawD, FinalInstrRawF, InstrRawF;
|
|
logic [31:0] InstrE;
|
|
logic [`XLEN-1:0] PCD;
|
|
|
|
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
|
logic reset_q; // *** look at this later.
|
|
|
|
logic BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
|
|
|
|
(* mark_debug = "true" *) logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width.
|
|
logic [`XLEN+1:0] PCFExt;
|
|
logic [`XLEN-1:0] PCBPWrongInvalidate;
|
|
logic BPPredWrongM;
|
|
logic CacheableF;
|
|
|
|
|
|
|
|
generate
|
|
if (`XLEN==32) begin:pcnextfphys
|
|
//assign PCPF = PCPFmmu[31:0];
|
|
assign PCNextFPhys = {{(`PA_BITS-`XLEN){1'b0}}, PCNextF};
|
|
end else begin:pcnextfphys
|
|
//assign PCPF = {8'b0, PCPFmmu};
|
|
assign PCNextFPhys = PCNextF[`PA_BITS-1:0];
|
|
end
|
|
endgenerate
|
|
|
|
assign PCFExt = {2'b00, PCF};
|
|
//
|
|
mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1))
|
|
immu(.PAdr(PCFExt[`PA_BITS-1:0]),
|
|
.VAdr(PCF),
|
|
.Size(2'b10),
|
|
.PTE(PTE),
|
|
.PageTypeWriteVal(PageType),
|
|
.TLBWrite(ITLBWriteF),
|
|
.TLBFlush(ITLBFlushF),
|
|
.PhysicalAddress(PCPFmmu),
|
|
.TLBMiss(ITLBMissF),
|
|
.TLBPageFault(ITLBInstrPageFaultF),
|
|
.ExecuteAccessF(1'b1), // ***dh -- this should eventually change to only true if an instruction fetch is occurring
|
|
.AtomicAccessM(1'b0),
|
|
.ReadAccessM(1'b0),
|
|
.WriteAccessM(1'b0),
|
|
.LoadAccessFaultM(),
|
|
.StoreAccessFaultM(),
|
|
.DisableTranslation(1'b0),
|
|
.Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(),
|
|
|
|
.clk, .reset,
|
|
.SATP_REGW,
|
|
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV,
|
|
.STATUS_MPP,
|
|
.PrivilegeModeW,
|
|
.InstrAccessFaultF,
|
|
.PMPCFG_ARRAY_REGW,
|
|
.PMPADDR_ARRAY_REGW
|
|
);
|
|
|
|
|
|
|
|
// branch predictor signal
|
|
logic SelBPPredF;
|
|
logic [`XLEN-1:0] BPPredPCF, PCNext0F, PCNext1F, PCNext2F, PCNext3F;
|
|
logic [4:0] InstrClassD, InstrClassE;
|
|
|
|
logic ICacheFetchLine;
|
|
logic BusStall;
|
|
logic ICacheStallF;
|
|
logic IgnoreRequest;
|
|
|
|
|
|
|
|
// *** put memory interface on here, InstrF becomes output
|
|
//assign ICacheBusAdr = PCF; // *** no MMU
|
|
//assign IfuBusFetch = ~StallD; // *** & ICacheMissF; add later
|
|
// assign IfuBusFetch = 1; // *** & ICacheMissF; add later
|
|
|
|
// conditional
|
|
// 1. ram // controlled by `MEM_IROM
|
|
// 2. cache // `MEM_ICACHE
|
|
// 3. wire pass-through
|
|
|
|
localparam integer WORDSPERLINE = `MEM_ICACHE ? `ICACHE_BLOCKLENINBITS/`XLEN : 1;
|
|
localparam integer LOGWPL = `MEM_ICACHE ? $clog2(WORDSPERLINE) : 1;
|
|
localparam integer BLOCKLEN = `MEM_ICACHE ? `ICACHE_BLOCKLENINBITS : `XLEN;
|
|
localparam integer WordCountThreshold = `MEM_ICACHE ? WORDSPERLINE - 1 : 0;
|
|
|
|
localparam integer BLOCKBYTELEN = BLOCKLEN/8;
|
|
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
|
|
|
|
logic [LOGWPL-1:0] WordCount;
|
|
logic [BLOCKLEN-1:0] ICacheMemWriteData;
|
|
logic ICacheBusAck;
|
|
logic [`PA_BITS-1:0] LocalIfuBusAdr;
|
|
logic [`PA_BITS-1:0] ICacheBusAdr;
|
|
logic SelUncachedAdr;
|
|
|
|
|
|
// *** bug: on spill the second memory request does not go through the mmu(skips tlb, pmp, and pma checkers)
|
|
// also it is possible to have any above fault on the spilled accesses.
|
|
// I think the solution is to move the spill logic into the ifu using the busfsm and ensuring
|
|
// the mmu sees the spilled address.
|
|
generate
|
|
if(`MEM_ICACHE) begin : icache
|
|
icache icache(.clk, .reset, .CPUBusy(StallF), .IgnoreRequest, .ICacheMemWriteData , .ICacheBusAck,
|
|
.ICacheBusAdr, .CompressedF, .ICacheStallF, .ITLBMissF, .ITLBWriteF, .FinalInstrRawF,
|
|
.ICacheFetchLine,
|
|
.CacheableF,
|
|
.PCNextF(PCNextFPhys),
|
|
.PCPF(PCPFmmu),
|
|
.PCF,
|
|
.InvalidateICacheM);
|
|
|
|
end else begin : passthrough
|
|
assign ICacheFetchLine = 0;
|
|
assign ICacheBusAdr = 0;
|
|
assign CompressedF = 0; //?
|
|
assign ICacheStallF = 0;
|
|
assign FinalInstrRawF = 0;
|
|
end
|
|
endgenerate
|
|
|
|
// select between dcache and direct from the BUS. Always selected if no dcache.
|
|
mux2 #(32) UnCachedInstrMux(.d0(FinalInstrRawF),
|
|
.d1(ICacheMemWriteData[31:0]),
|
|
.s(SelUncachedAdr),
|
|
.y(InstrRawF));
|
|
|
|
|
|
genvar index;
|
|
generate
|
|
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
|
|
flopen #(`XLEN) fb(.clk(clk),
|
|
.en(IfuBusAck & IfuBusRead & (index == WordCount)),
|
|
.d(IfuBusHRDATA),
|
|
.q(ICacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN]));
|
|
end
|
|
endgenerate
|
|
|
|
assign LocalIfuBusAdr = SelUncachedAdr ? PCPFmmu : ICacheBusAdr;
|
|
assign IfuBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalIfuBusAdr;
|
|
|
|
busfsm #(WordCountThreshold, LOGWPL, `MEM_ICACHE)
|
|
busfsm(.clk, .reset, .IgnoreRequest,
|
|
.LsuRWM(2'b10), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0),
|
|
.LsuBusAck(IfuBusAck),
|
|
.CPUBusy(StallF), .CacheableM(CacheableF),
|
|
.BusStall, .LsuBusWrite(), .LsuBusRead(IfuBusRead), .DCacheBusAck(ICacheBusAck),
|
|
.BusCommittedM(), .SelUncachedAdr(SelUncachedAdr), .WordCount);
|
|
|
|
assign IfuStallF = ICacheStallF | BusStall;
|
|
|
|
assign IgnoreRequest = ITLBMissF | ExceptionM | PendingInterruptM;
|
|
|
|
|
|
|
|
|
|
|
|
flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : InstrRawF, nop, InstrRawD);
|
|
|
|
|
|
assign PrivilegedChangePCM = RetM | TrapM;
|
|
|
|
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F),
|
|
.d1(BPPredPCF),
|
|
.s(SelBPPredF),
|
|
.y(PCNext0F));
|
|
|
|
mux2 #(`XLEN) pcmux1(.d0(PCNext0F),
|
|
.d1(PCCorrectE),
|
|
.s(BPPredWrongE),
|
|
.y(PCNext1F));
|
|
|
|
// December 20, 2021 Ross Thompson, If instructions in ID and IF are already invalid we don't pick PCE on icache invalidate.
|
|
// this only happens because of branch class miss prediction. The Fence instruction was incorrectly predicted as a branch
|
|
// this means on the previous cycle the BPPredWrongE updated PCNextF to the correct fall through address.
|
|
// to fix we need to select the correct address PCF as the next PCNextF. Unforunately we must still flush the instruction in IF
|
|
// as we are deliberately invalidating the icache. This address has to be refetched by the icache.
|
|
mux2 #(`XLEN) pcmux2(.d0(PCNext1F),
|
|
.d1(PCBPWrongInvalidate),
|
|
.s(InvalidateICacheM),
|
|
.y(PCNext2F));
|
|
|
|
mux2 #(`XLEN) pcmux3(.d0(PCNext2F),
|
|
.d1(PrivilegedNextPCM),
|
|
.s(PrivilegedChangePCM),
|
|
.y(PCNext3F));
|
|
|
|
mux2 #(`XLEN) pcmux4(.d0(PCNext3F),
|
|
.d1(`RESET_VECTOR),
|
|
.s(reset_q),
|
|
.y(UnalignedPCNextF));
|
|
|
|
flop #(1) resetReg (.clk(clk),
|
|
.d(reset),
|
|
.q(reset_q));
|
|
|
|
|
|
flopenrc #(1) BPPredWrongMReg(.clk, .reset, .en(~StallM), .clear(FlushM),
|
|
.d(BPPredWrongE), .q(BPPredWrongM));
|
|
|
|
mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF),
|
|
.s(BPPredWrongM & InvalidateICacheM),
|
|
.y(PCBPWrongInvalidate));
|
|
|
|
|
|
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
|
|
flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF);
|
|
|
|
// branch and jump predictor
|
|
generate
|
|
if (`BPRED_ENABLED == 1) begin : bpred
|
|
// I am making the port connection explicit for now as I want to see them and they will be changing.
|
|
|
|
bpred bpred(.clk, .reset,
|
|
.StallF, .StallD, .StallE,
|
|
.FlushF, .FlushD, .FlushE,
|
|
.PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE,
|
|
.PCD, .PCLinkE, .InstrClassE, .BPPredWrongE, .BPPredDirWrongE,
|
|
.BTBPredPCWrongE, .RASPredPCWrongE, .BPPredClassNonCFIWrongE);
|
|
|
|
end else begin : bpred
|
|
assign BPPredPCF = {`XLEN{1'b0}};
|
|
assign SelBPPredF = 1'b0;
|
|
assign BPPredWrongE = PCSrcE;
|
|
assign BPPredDirWrongE = 1'b0;
|
|
assign BTBPredPCWrongE = 1'b0;
|
|
assign RASPredPCWrongE = 1'b0;
|
|
assign BPPredClassNonCFIWrongE = 1'b0;
|
|
end
|
|
endgenerate
|
|
// The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE.
|
|
assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE;
|
|
|
|
// pcadder
|
|
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
|
|
assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC
|
|
// choose PC+2 or PC+4
|
|
always_comb
|
|
if (CompressedF) // add 2
|
|
if (PCF[1]) PCPlus2or4F = {PCPlusUpperF, 2'b00};
|
|
else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10};
|
|
else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4
|
|
|
|
// Decode stage pipeline register and logic
|
|
flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD);
|
|
|
|
// expand 16-bit compressed instructions to 32 bits
|
|
|
|
decompress decomp(.InstrRawD, .InstrD, .IllegalCompInstrD);
|
|
assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
|
|
// *** combine these with others in better way, including M, F
|
|
|
|
|
|
// the branch predictor needs a compact decoding of the instruction class.
|
|
// *** consider adding in the alternate return address x5 for returns.
|
|
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
|
|
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
|
|
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
|
|
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
|
|
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
|
|
|
|
// Misaligned PC logic
|
|
// instruction address misalignment is generated by the target of control flow instructions, not
|
|
// the fetch itself.
|
|
assign misaligned = PCNextF[0] | (PCNextF[1] & ~`C_SUPPORTED);
|
|
// do we really need to have check if the instruction is control flow? Yes
|
|
// Branches are updated in the execution stage but traps are updated in the memory stage.
|
|
|
|
// pipeline misaligned faults to M stage
|
|
assign BranchMisalignedFaultE = misaligned & PCSrcE; // E-stage (Branch/Jump) misaligned
|
|
flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, BranchMisalignedFaultM);
|
|
// *** Ross Thompson. Check InstrMisalignedAdrM as I believe it is the same as PCF. Should be able to remove.
|
|
flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM);
|
|
assign TrapMisalignedFaultM = misaligned & PrivilegedChangePCM;
|
|
assign InstrMisalignedFaultM = BranchMisalignedFaultM; // | TrapMisalignedFaultM; *** put this back in without causing a cyclic path
|
|
|
|
flopenr #(32) InstrEReg(clk, reset, ~StallE, FlushE ? nop : InstrD, InstrE);
|
|
flopenr #(32) InstrMReg(clk, reset, ~StallM, FlushM ? nop : InstrE, InstrM);
|
|
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE);
|
|
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
|
|
|
|
flopenrc #(5) InstrClassRegE(.clk(clk),
|
|
.reset(reset),
|
|
.en(~StallE),
|
|
.clear(FlushE),
|
|
.d(InstrClassD),
|
|
.q(InstrClassE));
|
|
|
|
flopenrc #(5) InstrClassRegM(.clk(clk),
|
|
.reset(reset),
|
|
.en(~StallM),
|
|
.clear(FlushM),
|
|
.d(InstrClassE),
|
|
.q(InstrClassM));
|
|
|
|
flopenrc #(4) BPPredWrongRegM(.clk(clk),
|
|
.reset(reset),
|
|
.en(~StallM),
|
|
.clear(FlushM),
|
|
.d({BPPredDirWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE}),
|
|
.q({BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM}));
|
|
|
|
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
|
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
|
// have dedicated adder in Mem stage based on PCM + 2 or 4
|
|
// *** redo this
|
|
flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD);
|
|
flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE);
|
|
endmodule
|
|
|