Reworked the hazards to eliminate StallFCause. Flush and CSRWrites now flush F,D,E stages and set the correct PCNextF in the M stage.

This commit is contained in:
Ross Thompson 2022-12-15 09:53:35 -06:00
parent 0358a8d255
commit c253b882be
5 changed files with 51 additions and 43 deletions

View File

@ -32,13 +32,13 @@
module hazard( module hazard(
// Detect hazards // Detect hazards
(* mark_debug = "true" *) input logic BPPredWrongE, CSRWriteFencePendingDEM, RetM, TrapM, (* mark_debug = "true" *) input logic BPPredWrongE, CSRWriteFenceM, RetM, TrapM,
(* mark_debug = "true" *) input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, (* mark_debug = "true" *) input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
(* mark_debug = "true" *) input logic LSUStallM, IFUStallF, (* mark_debug = "true" *) input logic LSUStallM, IFUStallF,
(* mark_debug = "true" *) input logic FCvtIntStallD, FStallD, (* mark_debug = "true" *) input logic FCvtIntStallD, FStallD,
(* mark_debug = "true" *) input logic DivBusyE,FDivBusyE, (* mark_debug = "true" *) input logic DivBusyE,FDivBusyE,
(* mark_debug = "true" *) input logic EcallFaultM, BreakpointFaultM, (* mark_debug = "true" *) input logic EcallFaultM, BreakpointFaultM,
(* mark_debug = "true" *) input logic wfiM, IntPendingM, (* mark_debug = "true" *) input logic wfiM, IntPendingM,
// Stall & flush outputs // Stall & flush outputs
(* mark_debug = "true" *) output logic StallF, StallD, StallE, StallM, StallW, (* mark_debug = "true" *) output logic StallF, StallD, StallE, StallM, StallW,
(* mark_debug = "true" *) output logic FlushD, FlushE, FlushM, FlushW (* mark_debug = "true" *) output logic FlushD, FlushE, FlushM, FlushW
@ -46,7 +46,6 @@ module hazard(
logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause;
logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW;
logic FlushDCause, FlushECause, FlushMCause, FlushWCause;
// stalls and flushes // stalls and flushes
// loads: stall for one cycle if the subsequent instruction depends on the load // loads: stall for one cycle if the subsequent instruction depends on the load
@ -60,22 +59,24 @@ module hazard(
// A stage must stall if the next stage is stalled // A stage must stall if the next stage is stalled
// If any stages are stalled, the first stage that isn't stalled must flush. // If any stages are stalled, the first stage that isn't stalled must flush.
assign FlushDCause = TrapM | RetM | BPPredWrongE; // *** can stalls be pushed into earlier stages (e.g. no stall after Decode?)
assign FlushECause = TrapM | RetM | BPPredWrongE;
assign FlushMCause = TrapM | RetM;
// on Trap the memory stage should be flushed going into the W stage,
// except if the instruction causing the Trap is an ecall or ebreak.
assign FlushWCause = TrapM & ~(BreakpointFaultM | EcallFaultM);
// *** consider replacing CSRWriteFencePendingDEM with a flush rather than a stall. // *** consider replacing CSRWriteFencePendingDEM with a flush rather than a stall.
assign StallFCause = CSRWriteFencePendingDEM & ~FlushDCause; //assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
assign StallFCause = '0;
// stall in decode if instruction is a load/mul/csr dependent on previous // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FStallD) & ~FlushECause; assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE | CSRWriteFenceM);
assign StallECause = (DivBusyE | FDivBusyE) & ~FlushMCause; assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM | CSRWriteFenceM); // *** can we move to decode stage (KP?)
// WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap
assign StallMCause = wfiM & ~FlushWCause & ~IntPendingM; assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM));
assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM); assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM); // | (FDivBusyE & ~TrapM & ~IntPendingM);
//assign StallWCause = (IFUStallF | LSUStallM) & ~FlushWCause; // if the fpga fails this is likely why.
// head version
// assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); // *** FDivBusyE should look like DivBusyE
// assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE;
// assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
// *** ross: my changes to cache and lsu need to disable ifu/lsu stalls on a Trap.
assign #1 StallF = StallFCause | StallD; assign #1 StallF = StallFCause | StallD;
assign #1 StallD = StallDCause | StallE; assign #1 StallD = StallDCause | StallE;
@ -89,8 +90,10 @@ module hazard(
assign FirstUnstalledW = ~StallW & StallM; assign FirstUnstalledW = ~StallW & StallM;
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
assign #1 FlushD = FirstUnstalledD | FlushDCause; assign #1 FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE | CSRWriteFenceM;
assign #1 FlushE = FirstUnstalledE | FlushECause; assign #1 FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE | CSRWriteFenceM ; // *** why is BPPredWrongE here, but not needed in simple processor
assign #1 FlushM = FirstUnstalledM | FlushMCause; assign #1 FlushM = FirstUnstalledM | TrapM | RetM | CSRWriteFenceM;
assign #1 FlushW = FirstUnstalledW | FlushWCause; // on Trap the memory stage should be flushed going into the W stage,
// except if the instruction causing the Trap is an ecall or ebreak.
assign #1 FlushW = FirstUnstalledW | (TrapM & ~(BreakpointFaultM | EcallFaultM));
endmodule endmodule

View File

@ -68,7 +68,8 @@ module controller(
output logic RegWriteW, DivW, // for datapath and Hazard Unit output logic RegWriteW, DivW, // for datapath and Hazard Unit
output logic [2:0] ResultSrcW, output logic [2:0] ResultSrcW,
// Stall during CSRs // Stall during CSRs
output logic CSRWriteFencePendingDEM, //output logic CSRWriteFencePendingDEM,
output logic CSRWriteFenceM,
output logic StoreStallD output logic StoreStallD
); );
@ -92,7 +93,7 @@ module controller(
logic CSRZeroSrcD; logic CSRZeroSrcD;
logic CSRReadD; logic CSRReadD;
logic [1:0] AtomicD; logic [1:0] AtomicD;
logic FenceD; logic FenceXD;
logic InvalidateICacheD, FlushDCacheD; logic InvalidateICacheD, FlushDCacheD;
logic CSRWriteD, CSRWriteE; logic CSRWriteD, CSRWriteE;
logic InstrValidD, InstrValidE; logic InstrValidD, InstrValidE;
@ -108,7 +109,8 @@ module controller(
logic IEURegWriteE; logic IEURegWriteE;
logic IllegalERegAdrD; logic IllegalERegAdrD;
logic [1:0] AtomicE; logic [1:0] AtomicE;
logic FencePendingD, FencePendingE, FencePendingM; logic FenceD, FenceE, FenceM;
logic SFenceVmaD;
logic DivE, DivM; logic DivE, DivM;
@ -177,11 +179,12 @@ module controller(
assign IllegalBaseInstrFaultD = ControlsD[0] | IllegalERegAdrD; assign IllegalBaseInstrFaultD = ControlsD[0] | IllegalERegAdrD;
assign {RegWriteD, ImmSrcD, ALUSrcAD, ALUSrcBD, MemRWD, assign {RegWriteD, ImmSrcD, ALUSrcAD, ALUSrcBD, MemRWD,
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, W64D, CSRReadD, ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, W64D, CSRReadD,
PrivilegedD, FenceD, MDUD, AtomicD, unused} = IllegalIEUInstrFaultD ? `CTRLW'b0 : ControlsD; PrivilegedD, FenceXD, MDUD, AtomicD, unused} = IllegalIEUInstrFaultD ? `CTRLW'b0 : ControlsD;
assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source? assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source?
assign CSRWriteD = CSRReadD & !(CSRZeroSrcD & InstrD[13]); // Don't write if setting or clearing zeros assign CSRWriteD = CSRReadD & !(CSRZeroSrcD & InstrD[13]); // Don't write if setting or clearing zeros
assign FencePendingD = PrivilegedD & (InstrD[31:25] == 7'b0001001) | FenceD; // possible sfence.vma or fence.i assign SFenceVmaD = PrivilegedD & (InstrD[31:25] == 7'b0001001);
assign FenceD = SFenceVmaD | FenceXD; // possible sfence.vma or fence.i
// ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra // ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra
assign sltD = (Funct3D == 3'b010); assign sltD = (Funct3D == 3'b010);
@ -196,7 +199,7 @@ module controller(
// FENCE.I flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented // FENCE.I flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented
if (`ZIFENCEI_SUPPORTED & `ICACHE) begin:fencei if (`ZIFENCEI_SUPPORTED & `ICACHE) begin:fencei
logic FenceID; logic FenceID;
assign FenceID = FenceD & (Funct3D == 3'b001); // is it a FENCE.I instruction? assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction?
assign InvalidateICacheD = FenceID; assign InvalidateICacheD = FenceID;
assign FlushDCacheD = FenceID; assign FlushDCacheD = FenceID;
end else begin:fencei end else begin:fencei
@ -209,8 +212,8 @@ module controller(
// Execute stage pipeline control register and logic // Execute stage pipeline control register and logic
flopenrc #(28) controlregE(clk, reset, FlushE, ~StallE, flopenrc #(28) controlregE(clk, reset, FlushE, ~StallE,
{RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FencePendingD, InstrValidD}, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, InstrValidD},
{IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE}); {IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE});
// Branch Logic // Branch Logic
assign BranchSignedE = ~(Funct3E[2:1] == 2'b11); assign BranchSignedE = ~(Funct3E[2:1] == 2'b11);
@ -227,16 +230,17 @@ module controller(
// Memory stage pipeline control register // Memory stage pipeline control register
flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM, flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE}, {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE, DivE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM}); {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FenceM, InstrValidM, DivM});
// Writeback stage pipeline control register // Writeback stage pipeline control register
flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW, flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM, DivM}, {RegWriteM, ResultSrcM, DivM},
{RegWriteW, ResultSrcW, DivW}); {RegWriteW, ResultSrcW, DivW});
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages // Flush F, D, and E stages on a CSR write or Fence.I or SFence.VMA
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM; assign CSRWriteFenceM = CSRWriteM | FenceM;
// assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FenceD | FenceE | FenceM;
// the synchronous DTIM cannot read immediately after write // the synchronous DTIM cannot read immediately after write
// a cache cannot read or write immediately after a write // a cache cannot read or write immediately after a write

View File

@ -71,7 +71,7 @@ module ieu (
output logic FCvtIntStallD, LoadStallD, MDUStallD, CSRRdStallD, output logic FCvtIntStallD, LoadStallD, MDUStallD, CSRRdStallD,
output logic PCSrcE, output logic PCSrcE,
output logic CSRReadM, CSRWriteM, PrivilegedM, output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRWriteFencePendingDEM, output logic CSRWriteFenceM,
output logic StoreStallD output logic StoreStallD
); );
@ -101,7 +101,7 @@ module ieu (
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD); .StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD);
datapath dp( datapath dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,

View File

@ -55,7 +55,8 @@ module ifu (
input logic RetM, TrapM, input logic RetM, TrapM,
output logic CommittedF, output logic CommittedF,
input logic [`XLEN-1:0] PrivilegedNextPCM, input logic [`XLEN-1:0] PrivilegedNextPCM,
input logic InvalidateICacheM, input logic CSRWriteFenceM,
input logic InvalidateICacheM,
output logic [31:0] InstrD, InstrM, output logic [31:0] InstrD, InstrM,
output logic [`XLEN-1:0] PCM, output logic [`XLEN-1:0] PCM,
// branch predictor // branch predictor
@ -288,10 +289,10 @@ module ifu (
assign PrivilegedChangePCM = RetM | TrapM; assign PrivilegedChangePCM = RetM | TrapM;
mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F));
if(`ICACHE) // if(`ICACHE | `ZICSR_SUPPORTED)
mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(CSRWriteFenceM),
.y(PCNext2F)); .y(PCNext2F));
else assign PCNext2F = PCNext1F; // else assign PCNext2F = PCNext1F;
if(`ZICSR_SUPPORTED) if(`ZICSR_SUPPORTED)
mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM),
.y(UnalignedPCNextF)); .y(UnalignedPCNextF));

View File

@ -79,7 +79,7 @@ module wallypipelinedcore (
logic StoreAmoMisalignedFaultM, StoreAmoAccessFaultM; logic StoreAmoMisalignedFaultM, StoreAmoAccessFaultM;
logic InvalidateICacheM, FlushDCacheM; logic InvalidateICacheM, FlushDCacheM;
logic PCSrcE; logic PCSrcE;
logic CSRWriteFencePendingDEM; logic CSRWriteFenceM;
logic DivBusyE; logic DivBusyE;
logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD; logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD;
logic SquashSCW; logic SquashSCW;
@ -183,7 +183,7 @@ module wallypipelinedcore (
.BPPredWrongE, .BPPredWrongE,
// Mem // Mem
.RetM, .TrapM, .CommittedF, .PrivilegedNextPCM, .InvalidateICacheM, .RetM, .TrapM, .CommittedF, .PrivilegedNextPCM, .InvalidateICacheM, .CSRWriteFenceM,
.InstrD, .InstrM, .PCM, .InstrClassM, .BPPredDirWrongM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPPredDirWrongM,
.BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM,
@ -241,7 +241,7 @@ module wallypipelinedcore (
.FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD,
.PCSrcE, .PCSrcE,
.CSRReadM, .CSRWriteM, .PrivilegedM, .CSRReadM, .CSRWriteM, .PrivilegedM,
.CSRWriteFencePendingDEM, .StoreStallD .CSRWriteFenceM, .StoreStallD
); // integer execution unit: integer register file, datapath and controller ); // integer execution unit: integer register file, datapath and controller
@ -317,7 +317,7 @@ module wallypipelinedcore (
hazard hzu( hazard hzu(
.BPPredWrongE, .CSRWriteFencePendingDEM, .RetM, .TrapM, .BPPredWrongE, .CSRWriteFenceM, .RetM, .TrapM,
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
.LSUStallM, .IFUStallF, .LSUStallM, .IFUStallF,
.FCvtIntStallD, .FStallD, .FCvtIntStallD, .FStallD,