Moved forwarding logic into controller

This commit is contained in:
David Harris 2023-12-26 21:17:01 -08:00
parent 8196ab20be
commit 2c2f692f3a
11 changed files with 113 additions and 153 deletions

View File

@ -28,9 +28,9 @@
module hazard import cvw::*; #(parameter cvw_t P) (
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
input logic StructuralStallD,
input logic LSUStallM, IFUStallF,
input logic FCvtIntStallD, FPUStallD,
input logic FPUStallD,
input logic DivBusyE, FDivBusyE,
input logic wfiM, IntPendingM,
// Stall & flush outputs
@ -82,7 +82,7 @@ module hazard import cvw::*; #(parameter cvw_t P) (
// The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions
// A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation
assign StallFCause = '0;
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause;
assign StallDCause = (StructuralStallD | FPUStallD) & ~FlushDCause;
assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause;
assign StallMCause = WFIStallM & ~FlushMCause;
// Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1.

View File

@ -27,8 +27,8 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) (
input logic [WIDTH-1:0] A, B, // Operands
module alu import cvw::*; #(parameter cvw_t P) (
input logic [P.XLEN-1:0] A, B, // Operands
input logic W64, // W64-type instruction
input logic SubArith, // Subtraction or arithmetic shift
input logic [2:0] ALUSelect, // ALU mux select signal
@ -37,14 +37,14 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) (
input logic [2:0] Funct3, // For BMU decoding
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
input logic BMUActiveE, // Bit manipulation instruction being executed
output logic [WIDTH-1:0] ALUResult, // ALU result
output logic [WIDTH-1:0] Sum); // Sum of operands
output logic [P.XLEN-1:0] ALUResult, // ALU result
output logic [P.XLEN-1:0] Sum); // Sum of operands
// CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction.
// FullResult = ALU result before adjusting for a RV64 w-suffix instruction.
logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals
logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux
logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux
logic [P.XLEN-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals
logic [P.XLEN-1:0] CondMaskB; // Result of B mask select mux
logic [P.XLEN-1:0] CondShiftA; // Result of A shifted select mux
logic Carry, Neg; // Flags: carry out, negative
logic LT, LTU; // Less than, Less than unsigned
logic Asign, Bsign; // Sign bits of A, B
@ -53,7 +53,7 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) (
// CondMaskB is B for add/sub, or a masked version of B for certain bit manipulation instructions
// CondShiftA is A for add/sub or a shifted version of A for shift-and-add BMU instructions
assign CondMaskInvB = SubArith ? ~CondMaskB : CondMaskB;
assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(WIDTH-1){1'b0}}, SubArith};
assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(P.XLEN-1){1'b0}}, SubArith};
// Shifts (configurable for rotation)
shifter #(P) sh(.A, .Amt(B[P.LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2]));
@ -62,9 +62,9 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) (
// Overflow occurs when the numbers being subtracted have the opposite sign
// and the result has the opposite sign of A.
// LT is simplified from Overflow = Asign & Bsign & Asign & Neg; LT = Neg ^ Overflow
assign Neg = Sum[WIDTH-1];
assign Asign = A[WIDTH-1];
assign Bsign = B[WIDTH-1];
assign Neg = Sum[P.XLEN-1];
assign Asign = A[P.XLEN-1];
assign Bsign = B[P.XLEN-1];
assign LT = Asign & ~Bsign | Asign & Neg | ~Bsign & Neg;
assign LTU = ~Carry;
@ -73,21 +73,22 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) (
case (ALUSelect)
3'b000: FullResult = Sum; // add or sub (including address generation)
3'b001: FullResult = Shift; // sll, sra, or srl
3'b010: FullResult = {{(WIDTH-1){1'b0}}, LT}; // slt
3'b011: FullResult = {{(WIDTH-1){1'b0}}, LTU}; // sltu
3'b010: FullResult = {{(P.XLEN-1){1'b0}}, LT}; // slt
3'b011: FullResult = {{(P.XLEN-1){1'b0}}, LTU}; // sltu
3'b100: FullResult = A ^ CondMaskInvB; // xor, xnor, binv
3'b101: FullResult = (P.ZBS_SUPPORTED | P.ZBB_SUPPORTED) ? {{(WIDTH-1){1'b0}},{|(A & CondMaskB)}} : Shift; // bext (or IEU shift when BMU not supported)
3'b101: FullResult = (P.ZBS_SUPPORTED | P.ZBB_SUPPORTED) ? {{(P.XLEN-1){1'b0}},{|(A & CondMaskB)}} : Shift; // bext (or IEU shift when BMU not supported)
3'b110: FullResult = A | CondMaskInvB; // or, orn, bset
3'b111: FullResult = A & CondMaskInvB; // and, bclr
endcase
// Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits
if (WIDTH == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
if (P.XLEN == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
else assign PreALUResult = FullResult;
// Final Result B instruction select mux
if (P.ZBC_SUPPORTED | P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED) begin : bitmanipalu
bitmanipalu #(P, WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, .BMUActiveE,
bitmanipalu #(P) balu(
.A, .B, .W64, .BSelect, .ZBBSelect, .BMUActiveE,
.Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult,
.CondMaskB, .CondShiftA, .ALUResult);
end else begin

View File

@ -27,9 +27,8 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module bitmanipalu import cvw::*; #(parameter cvw_t P,
parameter WIDTH=32) (
input logic [WIDTH-1:0] A, B, // Operands
module bitmanipalu import cvw::*; #(parameter cvw_t P) (
input logic [P.XLEN-1:0] A, B, // Operands
input logic W64, // W64-type instruction
input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
input logic [2:0] ZBBSelect, // ZBB mux select signal
@ -38,37 +37,37 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P,
input logic LTU, // less than unsigned flag
input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage
input logic BMUActiveE, // Bit manipulation instruction being executed
input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals
output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions
output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions
output logic [WIDTH-1:0] ALUResult); // Result
input logic [P.XLEN-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals
output logic [P.XLEN-1:0] CondMaskB, // B is conditionally masked for ZBS instructions
output logic [P.XLEN-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions
output logic [P.XLEN-1:0] ALUResult); // Result
logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result
logic [WIDTH-1:0] MaskB; // BitMask of B
logic [WIDTH-1:0] RevA; // Bit-reversed A
logic [P.XLEN-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result
logic [P.XLEN-1:0] MaskB; // BitMask of B
logic [P.XLEN-1:0] RevA; // Bit-reversed A
logic Rotate; // Indicates if it is Rotate instruction
logic Mask; // Indicates if it is ZBS instruction
logic PreShift; // Inidicates if it is sh1add, sh2add, sh3add instruction
logic [1:0] PreShiftAmt; // Amount to Pre-Shift A
logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal
logic [WIDTH-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity
logic [P.XLEN-1:0] CondZextA; // A Conditional Extend Intermediary Signal
logic [P.XLEN-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity
// gate data inputs to BMU to only operate when BMU is active
assign ABMU = A & {WIDTH{BMUActiveE}};
assign BBMU = B & {WIDTH{BMUActiveE}};
assign ABMU = A & {P.XLEN{BMUActiveE}};
assign BBMU = B & {P.XLEN{BMUActiveE}};
// Extract control signals from bitmanip ALUControl.
assign {Mask, PreShift} = BALUControl[1:0];
// Mask Generation Mux
if (P.ZBS_SUPPORTED) begin: zbsdec
decoder #($clog2(WIDTH)) maskgen(BBMU[$clog2(WIDTH)-1:0], MaskB);
mux2 #(WIDTH) maskmux(B, MaskB, Mask, CondMaskB);
decoder #($clog2(P.XLEN)) maskgen(BBMU[$clog2(P.XLEN)-1:0], MaskB);
mux2 #(P.XLEN) maskmux(B, MaskB, Mask, CondMaskB);
end else assign CondMaskB = B;
// 0-3 bit Pre-Shift Mux
if (P.ZBA_SUPPORTED) begin: zbapreshift
if (WIDTH == 64) begin
if (P.XLEN == 64) begin
mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, W64, CondZextA);
end else assign CondZextA = A;
assign PreShiftAmt = Funct3[2:1] & {2{PreShift}};
@ -80,17 +79,17 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P,
// Bit reverse needed for some ZBB, ZBC instructions
if (P.ZBC_SUPPORTED | P.ZBB_SUPPORTED) begin: bitreverse
bitreverse #(WIDTH) brA(.A(ABMU), .RevA);
bitreverse #(P.XLEN) brA(.A(ABMU), .RevA);
end
// ZBC Unit
if (P.ZBC_SUPPORTED) begin: zbc
zbc #(WIDTH) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult);
zbc #(P.XLEN) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult);
end else assign ZBCResult = 0;
// ZBB Unit
if (P.ZBB_SUPPORTED) begin: zbb
zbb #(WIDTH) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult);
zbb #(P.XLEN) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult);
end else assign ZBBResult = 0;
// Result Select Mux

View File

@ -39,10 +39,14 @@ module controller import cvw::*; #(parameter cvw_t P) (
output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
output logic JumpD, // Jump instruction
output logic BranchD, // Branch instruction
// Execute stage control signals
output logic StructuralStallD, // Structural stalls detected by controller
output logic LoadStallD, // Structural stalls for load, sent to performance counters
output logic [4:0] Rs1D, Rs2D, // Register sources to read in Decode or Execute stage
// Execute stage control signals
input logic StallE, FlushE, // Stall, flush Execute stage
input logic [1:0] FlagsE, // Comparison flags ({eq, lt})
input logic FWriteIntE, // Write integer register, coming from FPU controller
input logic FCvtIntE, // FPU convert float to int
output logic PCSrcE, // Select signal to choose next PC (for datapath and Hazard unit)
output logic ALUSrcAE, ALUSrcBE, // ALU operands
output logic ALUResultSrcE, // Selects result to pass on to Memory stage
@ -65,7 +69,7 @@ module controller import cvw::*; #(parameter cvw_t P) (
output logic [3:0] CMOpM, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero
output logic IFUPrefetchE, // instruction prefetch
output logic LSUPrefetchM, // data prefetch
output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers
// Memory stage control signals
input logic StallM, FlushM, // Stall, flush Memory stage
output logic [1:0] MemRWE, // Mem read/write: MemRWM[1] = 1 for read, MemRWM[0] = 1 for write
@ -83,14 +87,16 @@ module controller import cvw::*; #(parameter cvw_t P) (
output logic [2:0] ResultSrcW, // Select source of result to write back to register file
// Stall during CSRs
output logic CSRWriteFenceM, // CSR write or fence instruction; needs to flush the following instructions
output logic StoreStallD // Store (memory write) causes stall
output logic [4:0] RdE, RdM, // Pipelined destination registers
// Forwarding controls
output logic [4:0] RdW // Register destinations in Execute, Memory, or Writeback stage
);
logic [4:0] Rs1E, Rs2E; // pipelined register sources
logic [6:0] OpD; // Opcode in Decode stage
logic [2:0] Funct3D; // Funct3 field in Decode stage
logic [6:0] Funct7D; // Funct7 field in Decode stage
logic [4:0] Rs1D, Rs2D, RdD; // Rs1/2 source register / dest reg in Decode stage
logic [4:0] RdD; // Rs1/2 source register / dest reg in Decode stage
`define CTRLW 24
@ -146,6 +152,9 @@ module controller import cvw::*; #(parameter cvw_t P) (
logic [3:0] CMOpD, CMOpE; // which CMO instruction 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero
logic IFUPrefetchD; // instruction prefetch
logic LSUPrefetchD, LSUPrefetchE; // data prefetch
logic AMOStallD, CMOStallD; // Structural hazards from atomic and cache management ops
logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage
logic FCvtIntStallD, MDUStallD, CSRRdStallD; // Stall due to conversion, load, multiply/divide, CSR read
// Extract fields
assign OpD = InstrD[6:0];
@ -394,6 +403,9 @@ module controller import cvw::*; #(parameter cvw_t P) (
flopenrc #(35) controlregE(clk, reset, FlushE, ~StallE,
{ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, IFUPrefetchD, LSUPrefetchD, InstrValidD},
{ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, IFUPrefetchE, LSUPrefetchE, InstrValidE});
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
// Branch Logic
// The comparator handles both signed and unsigned branches using BranchSignedE
@ -415,22 +427,45 @@ module controller import cvw::*; #(parameter cvw_t P) (
flopenrc #(25) controlregM(clk, reset, FlushM, ~StallM,
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE, IntDivE, CMOpE, LSUPrefetchE},
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FenceM, InstrValidM, IntDivM, CMOpM, LSUPrefetchM});
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
// Writeback stage pipeline control register
flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW,
{RegWriteM, ResultSrcM, IntDivM},
{RegWriteW, ResultSrcW, IntDivW});
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
// Flush F, D, and E stages on a CSR write or Fence.I or SFence.VMA
assign CSRWriteFenceM = CSRWriteM | FenceM;
// Forwarding logic
always_comb begin
ForwardAE = 2'b00;
ForwardBE = 2'b00;
if (Rs1E != 5'b0)
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10;
else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01;
if (Rs2E != 5'b0)
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10;
else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01;
end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction
assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
assign LoadStallD = (MemReadE|SCE) & MatchDE;
assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV
assign CSRRdStallD = CSRReadE & MatchDE;
// the synchronous DTIM cannot read immediately after write
// a cache cannot read or write immediately after a write
// atomic operations are also detected as MemRWD[1]
// atomic operations are also detected as MemRWD[1] ***check; seems like & MemRWE
// *** RT: Remove this after updating the cache.
// *** RT: Check that atomic after atomic works correctly.
//assign StoreStallD = ((|CMOpE)) & ((|CMOpD));
logic AMOHazard;
assign AMOHazard = &MemRWE & MemRWD[1];
assign StoreStallD = ((|CMOpE) & (|CMOpD)) | AMOHazard;
assign AMOStallD = &MemRWE & MemRWD[1]; // Read after atomic operation causes structural hazard
assign CMOStallD = (|CMOpE) & (|CMOpD); // CMO op after CMO op causes structural hazard ***explain, why doesn't interact with read/write
// Structural hazard causes stall if any of these events occur
assign StructuralStallD = LoadStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | AMOStallD | CMOStallD;
endmodule

View File

@ -32,6 +32,7 @@ module datapath import cvw::*; #(parameter cvw_t P) (
// Decode stage signals
input logic [2:0] ImmSrcD, // Selects type of immediate extension
input logic [31:0] InstrD, // Instruction in Decode stage
input logic [4:0] Rs1D, Rs2D, // Source registers
// Execute stage signals
input logic [P.XLEN-1:0] PCE, // PC in Execute stage
input logic [P.XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
@ -68,9 +69,8 @@ module datapath import cvw::*; #(parameter cvw_t P) (
input logic [P.XLEN-1:0] CSRReadValW, // CSR read result
input logic [P.XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result
input logic [P.XLEN-1:0] FIntDivResultW, // FPU's integer divide result
input logic [4:0] RdW // Destination register
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, // Register sources to read in Decode or Execute stage
output logic [4:0] RdE, RdM, RdW // Register destinations in Execute, Memory, or Writeback stage
);
// Fetch stage signals
@ -94,9 +94,6 @@ module datapath import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division)
// Decode stage
assign Rs1D = InstrD[19:15];
assign Rs2D = InstrD[24:20];
assign RdD = InstrD[11:7];
regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
extend #(P) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
@ -104,28 +101,23 @@ module datapath import cvw::*; #(parameter cvw_t P) (
flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
mux3 #(P.XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
mux3 #(P.XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
alu #(P, P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, BMUActiveE, ALUResultE, IEUAdrE);
alu #(P) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, BMUActiveE, ALUResultE, IEUAdrE);
mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
// Memory stage pipeline register
flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
// Writeback stage pipeline register and logic
flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
// floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt
if (P.F_SUPPORTED) begin:fpmux

View File

@ -1,62 +0,0 @@
///////////////////////////////////////////
// forward.sv
//
// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu
// Created: 9 January 2021
// Modified:
//
// Purpose: Determine datapath forwarding
//
// Documentation: RISC-V System on Chip Design Chapter 4 (Section 4.2.2.3)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module forward(
// Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers
input logic MemReadE, MDUE, CSRReadE, // Execute stage instruction is a load (MemReadE), divide (MDUE), or CSR read (CSRReadE)
input logic RegWriteM, RegWriteW, // Instruction in Memory or Writeback stage writes register file
input logic FCvtIntE, // FPU convert float to int
input logic SCE, // Store Conditional instruction
// Forwarding controls
output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers
output logic FCvtIntStallD, LoadStallD, MDUStallD, CSRRdStallD // Stall due to conversion, load, multiply/divide, CSR read
);
logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage
always_comb begin
ForwardAE = 2'b00;
ForwardBE = 2'b00;
if (Rs1E != 5'b0)
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10;
else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01;
if (Rs2E != 5'b0)
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10;
else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01;
end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction
assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
assign LoadStallD = (MemReadE|SCE) & MatchDE;
assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV
assign CSRRdStallD = CSRReadE & MatchDE;
endmodule

View File

@ -73,8 +73,8 @@ module ieu import cvw::*; #(parameter cvw_t P) (
// Hazard unit signals
input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit
output logic MDUStallD, CSRRdStallD, StoreStallD,
output logic StructuralStallD, // IEU detects structural hazard in Decode stage
output logic LoadStallD, // Structural stalls for load, sent to performance counters
output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction
output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions
);
@ -94,7 +94,7 @@ module ieu import cvw::*; #(parameter cvw_t P) (
logic SubArithE; // Subtraction or arithmetic shift
// Forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers
logic [4:0] Rs1D, Rs2D; // Source registers
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction
@ -104,25 +104,23 @@ module ieu import cvw::*; #(parameter cvw_t P) (
controller #(P) c(
.clk, .reset, .StallD, .FlushD, .InstrD, .STATUS_FS, .ENVCFG_CBE, .ImmSrcD,
.IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
.IllegalIEUFPUInstrD, .IllegalBaseInstrD,
.StructuralStallD, .LoadStallD, .Rs1D, .Rs2D,
.StallE, .FlushE, .FlagsE, .FWriteIntE,
.PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE,
.Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE,
.BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM,
.BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE,
.FCvtIntE, .ForwardAE, .ForwardBE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM,
.StallM, .FlushM, .MemRWE, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM,
.RdW, .RdE, .RdM);
datapath #(P) dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
.clk, .reset, .ImmSrcD, .InstrD, .Rs1D, .Rs2D, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
.Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE,
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW,
.StallW, .FlushW, .RegWriteW, .IntDivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .MDUResultW, .FIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,
.MemReadE, .MDUE, .CSRReadE, .RegWriteM, .RegWriteW,
.FCvtIntE, .SCE, .ForwardAE, .ForwardBE,
.FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD);
.CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW);
endmodule

View File

@ -54,7 +54,6 @@ module csr import cvw::*; #(parameter cvw_t P) (
input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode
// inputs for performance counters
input logic LoadStallD,
input logic StoreStallD,
input logic ICacheStallF,
input logic DCacheStallM,
input logic BPDirPredWrongM,
@ -275,7 +274,7 @@ module csr import cvw::*; #(parameter cvw_t P) (
if (P.ZICNTR_SUPPORTED) begin:counters
csrc #(P) counters(.clk, .reset, .StallE, .StallM, .FlushM,
.InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM,
.InstrValidNotFlushedM, .LoadStallD, .CSRWriteM, .CSRMWriteM,
.BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM,
.InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM,
.InterruptM, .ExceptionM, .InvalidateICacheM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE,

View File

@ -32,7 +32,7 @@ module csrc import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallE, StallM,
input logic FlushM,
input logic InstrValidNotFlushedM, LoadStallD, StoreStallD,
input logic InstrValidNotFlushedM, LoadStallD,
input logic CSRMWriteM, CSRWriteM,
input logic BPDirPredWrongM,
input logic BTAWrongM,
@ -75,7 +75,6 @@ module csrc import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] HPMCOUNTER_REGW[P.COUNTERS-1:0];
logic [P.XLEN-1:0] HPMCOUNTERH_REGW[P.COUNTERS-1:0];
logic LoadStallE, LoadStallM;
logic StoreStallE, StoreStallM;
logic [P.COUNTERS-1:0] WriteHPMCOUNTERM;
logic [P.COUNTERS-1:0] CounterEvent;
logic [63:0] HPMCOUNTERPlusM[P.COUNTERS-1:0];
@ -83,8 +82,8 @@ module csrc import cvw::*; #(parameter cvw_t P) (
genvar i;
// Interface signals
flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall.
flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM}));
flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall.
flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM));
// Determine when to increment each counter
assign CounterEvent[0] = 1'b1; // MCYCLE always increments
@ -100,7 +99,7 @@ module csrc import cvw::*; #(parameter cvw_t P) (
assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address
assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong
assign CounterEvent[11] = LoadStallM; // Load Stalls. don't want to suppress on flush as this only happens if flushed.
assign CounterEvent[12] = StoreStallM; // Store Stall
assign CounterEvent[12] = 0; // depricated Store Stall
assign CounterEvent[13] = DCacheAccess; // data cache access
assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss
assign CounterEvent[15] = DCacheStallM; // d cache miss cycles

View File

@ -45,7 +45,6 @@ module privileged import cvw::*; #(parameter cvw_t P) (
// processor events for performance counter logging
input logic FRegWriteM, // instruction will write floating-point registers
input logic LoadStallD, // load instruction is stalling
input logic StoreStallD, // store instruction is stalling
input logic ICacheStallF, // I cache stalled
input logic DCacheStallM, // D cache stalled
input logic BPDirPredWrongM, // branch predictor guessed wrong direction
@ -133,7 +132,7 @@ module privileged import cvw::*; #(parameter cvw_t P) (
.InstrM, .InstrOrigM, .PCM, .SrcAM, .IEUAdrM,
.CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .InterruptM,
.MTimerInt, .MExtInt, .SExtInt, .MSwInt,
.MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD,
.MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD,
.BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .BPWrongM,
.sfencevmaM, .ExceptionM, .InvalidateICacheM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE,
.IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess,

View File

@ -75,7 +75,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
logic PCSrcE;
logic CSRWriteFenceM;
logic DivBusyE;
logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD;
logic StructuralStallD;
logic LoadStallD;
logic SquashSCW;
logic MDUActiveE; // Mul/Div instruction being executed
logic ENVCFG_ADUE; // HPTW A/D Update enable
@ -95,7 +96,6 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
logic FCvtIntW;
logic FDivBusyE;
logic FRegWriteM;
logic FCvtIntStallD;
logic FpLoadStoreM;
logic [4:0] SetFflagsM;
logic [P.XLEN-1:0] FIntDivResultW;
@ -211,8 +211,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW,
// hazards
.StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE,
.CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);
.StructuralStallD, .LoadStallD, .PCSrcE,
.CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .InvalidateICacheM);
lsu #(P) lsu(
.clk, .reset, .StallM, .FlushM, .StallW, .FlushW,
@ -266,9 +266,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
// global stall and flush control
hazard #(P) hzu(
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
.StructuralStallD,
.LSUStallM, .IFUStallF,
.FCvtIntStallD, .FPUStallD,
.FPUStallD,
.DivBusyE, .FDivBusyE,
.wfiM, .IntPendingM,
// Stall & flush outputs
@ -284,7 +284,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.InstrM, .InstrOrigM, .CSRReadValW, .EPCM, .TrapVectorM,
.RetM, .TrapM, .sfencevmaM, .InvalidateICacheM, .DCacheStallM, .ICacheStallF,
.InstrValidM, .CommittedM, .CommittedF,
.FRegWriteM, .LoadStallD, .StoreStallD,
.FRegWriteM, .LoadStallD,
.BPDirPredWrongM, .BTAWrongM, .BPWrongM,
.RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE,
.InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM,