diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 0bea0a5d0..3728ceb17 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -28,9 +28,9 @@ module hazard import cvw::*; #(parameter cvw_t P) ( input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, - input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, + input logic StructuralStallD, input logic LSUStallM, IFUStallF, - input logic FCvtIntStallD, FPUStallD, + input logic FPUStallD, input logic DivBusyE, FDivBusyE, input logic wfiM, IntPendingM, // Stall & flush outputs @@ -82,7 +82,7 @@ module hazard import cvw::*; #(parameter cvw_t P) ( // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation assign StallFCause = '0; - assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; + assign StallDCause = (StructuralStallD | FPUStallD) & ~FlushDCause; assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; assign StallMCause = WFIStallM & ~FlushMCause; // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index f4618bc97..b8a0933dc 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -27,8 +27,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) ( - input logic [WIDTH-1:0] A, B, // Operands +module alu import cvw::*; #(parameter cvw_t P) ( + input logic [P.XLEN-1:0] A, B, // Operands input logic W64, // W64-type instruction input logic SubArith, // Subtraction or arithmetic shift input logic [2:0] ALUSelect, // ALU mux select signal @@ -37,14 +37,14 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) ( input logic [2:0] Funct3, // For BMU decoding input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage input logic BMUActiveE, // Bit manipulation instruction being executed - output logic [WIDTH-1:0] ALUResult, // ALU result - output logic [WIDTH-1:0] Sum); // Sum of operands + output logic [P.XLEN-1:0] ALUResult, // ALU result + output logic [P.XLEN-1:0] Sum); // Sum of operands // CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction. // FullResult = ALU result before adjusting for a RV64 w-suffix instruction. - logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals - logic [WIDTH-1:0] CondMaskB; // Result of B mask select mux - logic [WIDTH-1:0] CondShiftA; // Result of A shifted select mux + logic [P.XLEN-1:0] CondMaskInvB, Shift, FullResult, PreALUResult; // Intermediate Signals + logic [P.XLEN-1:0] CondMaskB; // Result of B mask select mux + logic [P.XLEN-1:0] CondShiftA; // Result of A shifted select mux logic Carry, Neg; // Flags: carry out, negative logic LT, LTU; // Less than, Less than unsigned logic Asign, Bsign; // Sign bits of A, B @@ -53,7 +53,7 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) ( // CondMaskB is B for add/sub, or a masked version of B for certain bit manipulation instructions // CondShiftA is A for add/sub or a shifted version of A for shift-and-add BMU instructions assign CondMaskInvB = SubArith ? ~CondMaskB : CondMaskB; - assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(WIDTH-1){1'b0}}, SubArith}; + assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(P.XLEN-1){1'b0}}, SubArith}; // Shifts (configurable for rotation) shifter #(P) sh(.A, .Amt(B[P.LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2])); @@ -62,9 +62,9 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) ( // Overflow occurs when the numbers being subtracted have the opposite sign // and the result has the opposite sign of A. // LT is simplified from Overflow = Asign & Bsign & Asign & Neg; LT = Neg ^ Overflow - assign Neg = Sum[WIDTH-1]; - assign Asign = A[WIDTH-1]; - assign Bsign = B[WIDTH-1]; + assign Neg = Sum[P.XLEN-1]; + assign Asign = A[P.XLEN-1]; + assign Bsign = B[P.XLEN-1]; assign LT = Asign & ~Bsign | Asign & Neg | ~Bsign & Neg; assign LTU = ~Carry; @@ -73,21 +73,22 @@ module alu import cvw::*; #(parameter cvw_t P, parameter WIDTH) ( case (ALUSelect) 3'b000: FullResult = Sum; // add or sub (including address generation) 3'b001: FullResult = Shift; // sll, sra, or srl - 3'b010: FullResult = {{(WIDTH-1){1'b0}}, LT}; // slt - 3'b011: FullResult = {{(WIDTH-1){1'b0}}, LTU}; // sltu + 3'b010: FullResult = {{(P.XLEN-1){1'b0}}, LT}; // slt + 3'b011: FullResult = {{(P.XLEN-1){1'b0}}, LTU}; // sltu 3'b100: FullResult = A ^ CondMaskInvB; // xor, xnor, binv - 3'b101: FullResult = (P.ZBS_SUPPORTED | P.ZBB_SUPPORTED) ? {{(WIDTH-1){1'b0}},{|(A & CondMaskB)}} : Shift; // bext (or IEU shift when BMU not supported) + 3'b101: FullResult = (P.ZBS_SUPPORTED | P.ZBB_SUPPORTED) ? {{(P.XLEN-1){1'b0}},{|(A & CondMaskB)}} : Shift; // bext (or IEU shift when BMU not supported) 3'b110: FullResult = A | CondMaskInvB; // or, orn, bset 3'b111: FullResult = A & CondMaskInvB; // and, bclr endcase // Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits - if (WIDTH == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; + if (P.XLEN == 64) assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; else assign PreALUResult = FullResult; // Final Result B instruction select mux if (P.ZBC_SUPPORTED | P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED) begin : bitmanipalu - bitmanipalu #(P, WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, .BMUActiveE, + bitmanipalu #(P) balu( + .A, .B, .W64, .BSelect, .ZBBSelect, .BMUActiveE, .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult, .CondMaskB, .CondShiftA, .ALUResult); end else begin diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 706ebdb21..44c66f795 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -27,9 +27,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module bitmanipalu import cvw::*; #(parameter cvw_t P, - parameter WIDTH=32) ( - input logic [WIDTH-1:0] A, B, // Operands +module bitmanipalu import cvw::*; #(parameter cvw_t P) ( + input logic [P.XLEN-1:0] A, B, // Operands input logic W64, // W64-type instruction input logic [1:0] BSelect, // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction input logic [2:0] ZBBSelect, // ZBB mux select signal @@ -38,37 +37,37 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P, input logic LTU, // less than unsigned flag input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage input logic BMUActiveE, // Bit manipulation instruction being executed - input logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals - output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions - output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions - output logic [WIDTH-1:0] ALUResult); // Result + input logic [P.XLEN-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals + output logic [P.XLEN-1:0] CondMaskB, // B is conditionally masked for ZBS instructions + output logic [P.XLEN-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions + output logic [P.XLEN-1:0] ALUResult); // Result - logic [WIDTH-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result - logic [WIDTH-1:0] MaskB; // BitMask of B - logic [WIDTH-1:0] RevA; // Bit-reversed A + logic [P.XLEN-1:0] ZBBResult, ZBCResult; // ZBB, ZBC Result + logic [P.XLEN-1:0] MaskB; // BitMask of B + logic [P.XLEN-1:0] RevA; // Bit-reversed A logic Rotate; // Indicates if it is Rotate instruction logic Mask; // Indicates if it is ZBS instruction logic PreShift; // Inidicates if it is sh1add, sh2add, sh3add instruction logic [1:0] PreShiftAmt; // Amount to Pre-Shift A - logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal - logic [WIDTH-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity + logic [P.XLEN-1:0] CondZextA; // A Conditional Extend Intermediary Signal + logic [P.XLEN-1:0] ABMU, BBMU; // Gated data inputs to reduce BMU activity // gate data inputs to BMU to only operate when BMU is active - assign ABMU = A & {WIDTH{BMUActiveE}}; - assign BBMU = B & {WIDTH{BMUActiveE}}; + assign ABMU = A & {P.XLEN{BMUActiveE}}; + assign BBMU = B & {P.XLEN{BMUActiveE}}; // Extract control signals from bitmanip ALUControl. assign {Mask, PreShift} = BALUControl[1:0]; // Mask Generation Mux if (P.ZBS_SUPPORTED) begin: zbsdec - decoder #($clog2(WIDTH)) maskgen(BBMU[$clog2(WIDTH)-1:0], MaskB); - mux2 #(WIDTH) maskmux(B, MaskB, Mask, CondMaskB); + decoder #($clog2(P.XLEN)) maskgen(BBMU[$clog2(P.XLEN)-1:0], MaskB); + mux2 #(P.XLEN) maskmux(B, MaskB, Mask, CondMaskB); end else assign CondMaskB = B; // 0-3 bit Pre-Shift Mux if (P.ZBA_SUPPORTED) begin: zbapreshift - if (WIDTH == 64) begin + if (P.XLEN == 64) begin mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, W64, CondZextA); end else assign CondZextA = A; assign PreShiftAmt = Funct3[2:1] & {2{PreShift}}; @@ -80,17 +79,17 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P, // Bit reverse needed for some ZBB, ZBC instructions if (P.ZBC_SUPPORTED | P.ZBB_SUPPORTED) begin: bitreverse - bitreverse #(WIDTH) brA(.A(ABMU), .RevA); + bitreverse #(P.XLEN) brA(.A(ABMU), .RevA); end // ZBC Unit if (P.ZBC_SUPPORTED) begin: zbc - zbc #(WIDTH) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); + zbc #(P.XLEN) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); end else assign ZBCResult = 0; // ZBB Unit if (P.ZBB_SUPPORTED) begin: zbb - zbb #(WIDTH) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); + zbb #(P.XLEN) ZBB(.A(ABMU), .RevA, .B(BBMU), .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult); end else assign ZBBResult = 0; // Result Select Mux diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 4fc3ac9e7..1285ab4cc 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -39,10 +39,14 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers output logic JumpD, // Jump instruction output logic BranchD, // Branch instruction - // Execute stage control signals + output logic StructuralStallD, // Structural stalls detected by controller + output logic LoadStallD, // Structural stalls for load, sent to performance counters + output logic [4:0] Rs1D, Rs2D, // Register sources to read in Decode or Execute stage + // Execute stage control signals input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] FlagsE, // Comparison flags ({eq, lt}) input logic FWriteIntE, // Write integer register, coming from FPU controller + input logic FCvtIntE, // FPU convert float to int output logic PCSrcE, // Select signal to choose next PC (for datapath and Hazard unit) output logic ALUSrcAE, ALUSrcBE, // ALU operands output logic ALUResultSrcE, // Selects result to pass on to Memory stage @@ -65,7 +69,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [3:0] CMOpM, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero output logic IFUPrefetchE, // instruction prefetch output logic LSUPrefetchM, // data prefetch - + output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers // Memory stage control signals input logic StallM, FlushM, // Stall, flush Memory stage output logic [1:0] MemRWE, // Mem read/write: MemRWM[1] = 1 for read, MemRWM[0] = 1 for write @@ -83,14 +87,16 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [2:0] ResultSrcW, // Select source of result to write back to register file // Stall during CSRs output logic CSRWriteFenceM, // CSR write or fence instruction; needs to flush the following instructions - output logic StoreStallD // Store (memory write) causes stall + output logic [4:0] RdE, RdM, // Pipelined destination registers + // Forwarding controls + output logic [4:0] RdW // Register destinations in Execute, Memory, or Writeback stage ); - + logic [4:0] Rs1E, Rs2E; // pipelined register sources logic [6:0] OpD; // Opcode in Decode stage logic [2:0] Funct3D; // Funct3 field in Decode stage logic [6:0] Funct7D; // Funct7 field in Decode stage - logic [4:0] Rs1D, Rs2D, RdD; // Rs1/2 source register / dest reg in Decode stage + logic [4:0] RdD; // Rs1/2 source register / dest reg in Decode stage `define CTRLW 24 @@ -146,6 +152,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( logic [3:0] CMOpD, CMOpE; // which CMO instruction 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero logic IFUPrefetchD; // instruction prefetch logic LSUPrefetchD, LSUPrefetchE; // data prefetch + logic AMOStallD, CMOStallD; // Structural hazards from atomic and cache management ops + logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage + logic FCvtIntStallD, MDUStallD, CSRRdStallD; // Stall due to conversion, load, multiply/divide, CSR read // Extract fields assign OpD = InstrD[6:0]; @@ -394,6 +403,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( flopenrc #(35) controlregE(clk, reset, FlushE, ~StallE, {ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, IFUPrefetchD, LSUPrefetchD, InstrValidD}, {ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, IFUPrefetchE, LSUPrefetchE, InstrValidE}); + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); + flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); + flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); // Branch Logic // The comparator handles both signed and unsigned branches using BranchSignedE @@ -415,22 +427,45 @@ module controller import cvw::*; #(parameter cvw_t P) ( flopenrc #(25) controlregM(clk, reset, FlushM, ~StallM, {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE, IntDivE, CMOpE, LSUPrefetchE}, {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FenceM, InstrValidM, IntDivM, CMOpM, LSUPrefetchM}); - + flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); + // Writeback stage pipeline control register flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW, {RegWriteM, ResultSrcM, IntDivM}, {RegWriteW, ResultSrcW, IntDivW}); + flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); // Flush F, D, and E stages on a CSR write or Fence.I or SFence.VMA assign CSRWriteFenceM = CSRWriteM | FenceM; + // Forwarding logic + always_comb begin + ForwardAE = 2'b00; + ForwardBE = 2'b00; + if (Rs1E != 5'b0) + if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; + else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01; + + if (Rs2E != 5'b0) + if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; + else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01; + end + + // Stall on dependent operations that finish in Mem Stage and can't bypass in time + assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction + assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt + assign LoadStallD = (MemReadE|SCE) & MatchDE; + assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV + assign CSRRdStallD = CSRReadE & MatchDE; + // the synchronous DTIM cannot read immediately after write // a cache cannot read or write immediately after a write - // atomic operations are also detected as MemRWD[1] + // atomic operations are also detected as MemRWD[1] ***check; seems like & MemRWE // *** RT: Remove this after updating the cache. // *** RT: Check that atomic after atomic works correctly. - //assign StoreStallD = ((|CMOpE)) & ((|CMOpD)); - logic AMOHazard; - assign AMOHazard = &MemRWE & MemRWD[1]; - assign StoreStallD = ((|CMOpE) & (|CMOpD)) | AMOHazard; + assign AMOStallD = &MemRWE & MemRWD[1]; // Read after atomic operation causes structural hazard + assign CMOStallD = (|CMOpE) & (|CMOpD); // CMO op after CMO op causes structural hazard ***explain, why doesn't interact with read/write + + // Structural hazard causes stall if any of these events occur + assign StructuralStallD = LoadStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | AMOStallD | CMOStallD; endmodule diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index 8c366a2ef..126410238 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -32,6 +32,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( // Decode stage signals input logic [2:0] ImmSrcD, // Selects type of immediate extension input logic [31:0] InstrD, // Instruction in Decode stage + input logic [4:0] Rs1D, Rs2D, // Source registers // Execute stage signals input logic [P.XLEN-1:0] PCE, // PC in Execute stage input logic [P.XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage) @@ -68,9 +69,8 @@ module datapath import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] CSRReadValW, // CSR read result input logic [P.XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result input logic [P.XLEN-1:0] FIntDivResultW, // FPU's integer divide result + input logic [4:0] RdW // Destination register // Hazard Unit signals - output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, // Register sources to read in Decode or Execute stage - output logic [4:0] RdE, RdM, RdW // Register destinations in Execute, Memory, or Writeback stage ); // Fetch stage signals @@ -94,9 +94,6 @@ module datapath import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division) // Decode stage - assign Rs1D = InstrD[19:15]; - assign Rs2D = InstrD[24:20]; - assign RdD = InstrD[11:7]; regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D); extend #(P) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD); @@ -104,28 +101,23 @@ module datapath import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E); flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE); - flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); - flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); - flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); mux3 #(P.XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE); mux3 #(P.XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE); comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); - alu #(P, P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, BMUActiveE, ALUResultE, IEUAdrE); + alu #(P) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, BMUActiveE, ALUResultE, IEUAdrE); mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); // Memory stage pipeline register flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); - flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); // Writeback stage pipeline register and logic flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); - flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); // floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt if (P.F_SUPPORTED) begin:fpmux diff --git a/src/ieu/forward.sv b/src/ieu/forward.sv deleted file mode 100644 index ef3cd4b4b..000000000 --- a/src/ieu/forward.sv +++ /dev/null @@ -1,62 +0,0 @@ -/////////////////////////////////////////// -// forward.sv -// -// Written: David_Harris@hmc.edu, Sarah.Harris@unlv.edu -// Created: 9 January 2021 -// Modified: -// -// Purpose: Determine datapath forwarding -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Section 4.2.2.3) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module forward( - // Detect hazards - input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers - input logic MemReadE, MDUE, CSRReadE, // Execute stage instruction is a load (MemReadE), divide (MDUE), or CSR read (CSRReadE) - input logic RegWriteM, RegWriteW, // Instruction in Memory or Writeback stage writes register file - input logic FCvtIntE, // FPU convert float to int - input logic SCE, // Store Conditional instruction - // Forwarding controls - output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers - output logic FCvtIntStallD, LoadStallD, MDUStallD, CSRRdStallD // Stall due to conversion, load, multiply/divide, CSR read -); - - logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage - - always_comb begin - ForwardAE = 2'b00; - ForwardBE = 2'b00; - if (Rs1E != 5'b0) - if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; - else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01; - - if (Rs2E != 5'b0) - if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; - else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01; - end - - // Stall on dependent operations that finish in Mem Stage and can't bypass in time - assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction - assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt - assign LoadStallD = (MemReadE|SCE) & MatchDE; - assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV - assign CSRRdStallD = CSRReadE & MatchDE; -endmodule diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 55cfdf854..b8b3c200d 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -73,8 +73,8 @@ module ieu import cvw::*; #(parameter cvw_t P) ( // Hazard unit signals input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit input logic FlushD, FlushE, FlushM, FlushW, // Flush signals - output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit - output logic MDUStallD, CSRRdStallD, StoreStallD, + output logic StructuralStallD, // IEU detects structural hazard in Decode stage + output logic LoadStallD, // Structural stalls for load, sent to performance counters output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions ); @@ -94,7 +94,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( logic SubArithE; // Subtraction or arithmetic shift // Forwarding signals - logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; // Source and destination registers + logic [4:0] Rs1D, Rs2D; // Source registers logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages logic MemReadE, CSRReadE; // Load, CSRRead instruction @@ -104,25 +104,23 @@ module ieu import cvw::*; #(parameter cvw_t P) ( controller #(P) c( .clk, .reset, .StallD, .FlushD, .InstrD, .STATUS_FS, .ENVCFG_CBE, .ImmSrcD, - .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE, + .IllegalIEUFPUInstrD, .IllegalBaseInstrD, + .StructuralStallD, .LoadStallD, .Rs1D, .Rs2D, + .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, .Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, - .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM, + .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, + .FCvtIntE, .ForwardAE, .ForwardBE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM, .StallM, .FlushM, .MemRWE, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, - .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); + .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, + .RdW, .RdE, .RdM); datapath #(P) dp( - .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE, + .clk, .reset, .ImmSrcD, .InstrD, .Rs1D, .Rs2D, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW, .StallW, .FlushW, .RegWriteW, .IntDivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, - .CSRReadValW, .MDUResultW, .FIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); - - forward fw( - .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, - .MemReadE, .MDUE, .CSRReadE, .RegWriteM, .RegWriteW, - .FCvtIntE, .SCE, .ForwardAE, .ForwardBE, - .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD); + .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW); endmodule diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index edb27155c..e43712d81 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -54,7 +54,6 @@ module csr import cvw::*; #(parameter cvw_t P) ( input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, - input logic StoreStallD, input logic ICacheStallF, input logic DCacheStallM, input logic BPDirPredWrongM, @@ -275,7 +274,7 @@ module csr import cvw::*; #(parameter cvw_t P) ( if (P.ZICNTR_SUPPORTED) begin:counters csrc #(P) counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, + .InstrValidNotFlushedM, .LoadStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, .InterruptM, .ExceptionM, .InvalidateICacheM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 2944b1a66..c3dbf1f6b 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -32,7 +32,7 @@ module csrc import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, + input logic InstrValidNotFlushedM, LoadStallD, input logic CSRMWriteM, CSRWriteM, input logic BPDirPredWrongM, input logic BTAWrongM, @@ -75,7 +75,6 @@ module csrc import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] HPMCOUNTER_REGW[P.COUNTERS-1:0]; logic [P.XLEN-1:0] HPMCOUNTERH_REGW[P.COUNTERS-1:0]; logic LoadStallE, LoadStallM; - logic StoreStallE, StoreStallM; logic [P.COUNTERS-1:0] WriteHPMCOUNTERM; logic [P.COUNTERS-1:0] CounterEvent; logic [63:0] HPMCOUNTERPlusM[P.COUNTERS-1:0]; @@ -83,8 +82,8 @@ module csrc import cvw::*; #(parameter cvw_t P) ( genvar i; // Interface signals - flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall. - flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM})); + flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. + flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); // Determine when to increment each counter assign CounterEvent[0] = 1'b1; // MCYCLE always increments @@ -100,7 +99,7 @@ module csrc import cvw::*; #(parameter cvw_t P) ( assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = LoadStallM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[12] = StoreStallM; // Store Stall + assign CounterEvent[12] = 0; // depricated Store Stall assign CounterEvent[13] = DCacheAccess; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[15] = DCacheStallM; // d cache miss cycles diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 4c27df006..5692fc9f8 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -45,7 +45,6 @@ module privileged import cvw::*; #(parameter cvw_t P) ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic StoreStallD, // store instruction is stalling input logic ICacheStallF, // I cache stalled input logic DCacheStallM, // D cache stalled input logic BPDirPredWrongM, // branch predictor guessed wrong direction @@ -133,7 +132,7 @@ module privileged import cvw::*; #(parameter cvw_t P) ( .InstrM, .InstrOrigM, .PCM, .SrcAM, .IEUAdrM, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, - .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, + .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .BPWrongM, .sfencevmaM, .ExceptionM, .InvalidateICacheM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index f861a08bc..78cec0665 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -75,7 +75,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic PCSrcE; logic CSRWriteFenceM; logic DivBusyE; - logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD; + logic StructuralStallD; + logic LoadStallD; logic SquashSCW; logic MDUActiveE; // Mul/Div instruction being executed logic ENVCFG_ADUE; // HPTW A/D Update enable @@ -95,7 +96,6 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic FCvtIntW; logic FDivBusyE; logic FRegWriteM; - logic FCvtIntStallD; logic FpLoadStoreM; logic [4:0] SetFflagsM; logic [P.XLEN-1:0] FIntDivResultW; @@ -211,8 +211,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE, - .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); + .StructuralStallD, .LoadStallD, .PCSrcE, + .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .InvalidateICacheM); lsu #(P) lsu( .clk, .reset, .StallM, .FlushM, .StallW, .FlushW, @@ -266,9 +266,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( // global stall and flush control hazard #(P) hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, - .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, + .StructuralStallD, .LSUStallM, .IFUStallF, - .FCvtIntStallD, .FPUStallD, + .FPUStallD, .DivBusyE, .FDivBusyE, .wfiM, .IntPendingM, // Stall & flush outputs @@ -284,7 +284,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .InstrM, .InstrOrigM, .CSRReadValW, .EPCM, .TrapVectorM, .RetM, .TrapM, .sfencevmaM, .InvalidateICacheM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, - .FRegWriteM, .LoadStallD, .StoreStallD, + .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTAWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM,