diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 28d0eef27..c543348e5 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -62,8 +62,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( output logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage output logic BMUActiveE, // Bit manipulation instruction being executed output logic MDUActiveE, // Mul/Div instruction being executed - output logic [3:0] CMOpE, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero - output logic [2:0] PrefetchE, // which prefetch instruction 1: prefetch.i, 2: prefetch.r, 4: prefetch.w + output logic [3:0] CMOpM, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero + output logic IFUPrefetchE, // instruction prefetch + output logic LSUPrefetchM, // data prefetch // Memory stage control signals input logic StallM, FlushM, // Stall, flush Memory stage @@ -95,6 +96,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( // pipelined control signals logic RegWriteD, RegWriteE; // RegWrite (register will be written) logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file + logic [2:0] PreImmSrcD; // Immediate source format (before amending for prefetches) logic [1:0] MemRWD, MemRWE; // Store (write to memory) logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) logic BaseW64D; // W64 for Base instructions specifically @@ -142,8 +144,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( logic FenceM; // Fence.I or sfence.VMA instruction in memory stage logic [2:0] ALUSelectD; // ALU Output selection mux control logic IWValidFunct3D; // Detects if Funct3 is valid for IW instructions - logic [3:0] CMOpD; // which CMO instruction 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero - logic [2:0] PrefetchD; // which prefetch instruction 1: prefetch.i, 2: prefetch.r, 4: prefetch.w + logic [3:0] CMOpD, CMOpE; // which CMO instruction 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero + logic IFUPrefetchD; // instruction prefetch + logic LSUPrefetchD, LSUPrefetchE; // data prefetch // Extract fields assign OpD = InstrD[6:0]; @@ -284,7 +287,7 @@ module controller import cvw::*; #(parameter cvw_t P) ( // On RV32E, can't write to upper 16 registers. Checking reads to upper 16 is more costly so disregard them. assign IllegalERegAdrD = P.E_SUPPORTED & P.ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11]; //assign IllegalBaseInstrD = 1'b0; - assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD, + assign {BaseRegWriteD, PreImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD, ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD, PrivilegedD, FenceXD, MDUD, AtomicD, CMOD, unused} = IllegalIEUFPUInstrD ? `CTRLW'b0 : ControlsD; @@ -367,25 +370,28 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Prefetch Hints always_comb begin - PrefetchD = 3'b000; // default: not a prefetch hint + // default: not a prefetch hint + IFUPrefetchD = 1'b0; + LSUPrefetchD = 1'b0; + ImmSrcD = PreImmSrcD; if (P.ZICBOP_SUPPORTED & (InstrD[14:0] == 15'b110_00000_0010011)) begin // ori with destiation x0 is hint for Prefetch - case (Rs2D) // which type of prefectch? - 5'b00000: PrefetchD = 3'b001; // prefetch.i - 5'b00001: PrefetchD = 3'b010; // prefetch.r - 5'b00011: PrefetchD = 3'b100; // prefetch.w + case (Rs2D) // which type of prefectch? Note: prefetch.r and .w are handled the same in Wally + 5'b00000: IFUPrefetchD = 1'b1; // prefetch.i + 5'b00001: LSUPrefetchD = 1'b1; // prefetch.r + 5'b00011: LSUPrefetchD = 1'b1; // prefetch.w // default: not a prefetch hint endcase + if (IFUPrefetchD | LSUPrefetchD) ImmSrcD = 3'b001; // use S-type immediate format for prefetches end end - //assign AnyPrefetchD = |PrefetchD; // Decode stage pipeline control register flopenrc #(1) controlregD(clk, reset, FlushD, ~StallD, 1'b1, InstrValidD); // Execute stage pipeline control register and logic - flopenrc #(36) controlregE(clk, reset, FlushE, ~StallE, - {ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, PrefetchD, InstrValidD}, - {ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, PrefetchE, InstrValidE}); + flopenrc #(35) controlregE(clk, reset, FlushE, ~StallE, + {ALUSelectD, RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, SubArithD, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FenceD, CMOpD, IFUPrefetchD, LSUPrefetchD, InstrValidD}, + {ALUSelectE, IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, SubArithE, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, CMOpE, IFUPrefetchE, LSUPrefetchE, InstrValidE}); // Branch Logic // The comparator handles both signed and unsigned branches using BranchSignedE @@ -404,9 +410,9 @@ module controller import cvw::*; #(parameter cvw_t P) ( assign IntDivE = MDUE & Funct3E[2]; // Integer division operation // Memory stage pipeline control register - flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM, - {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE, IntDivE}, - {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FenceM, InstrValidM, IntDivM}); + flopenrc #(25) controlregM(clk, reset, FlushM, ~StallM, + {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FenceE, InstrValidE, IntDivE, CMOpE, LSUPrefetchE}, + {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FenceM, InstrValidM, IntDivM, CMOpM, LSUPrefetchM}); // Writeback stage pipeline control register flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW, diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index 799a25988..bb7638514 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -98,7 +98,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D); - extend #(P.XLEN, P.A_SUPPORTED) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD); + extend #(P) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD); // Execute stage pipeline register and logic flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); diff --git a/src/ieu/extend.sv b/src/ieu/extend.sv index e0551e9dc..bcda43e0a 100644 --- a/src/ieu/extend.sv +++ b/src/ieu/extend.sv @@ -27,28 +27,29 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module extend #(parameter XLEN, A_SUPPORTED) ( - input logic [31:7] InstrD, // All instruction bits except opcode (lower 7 bits) - input logic [2:0] ImmSrcD, // Select what kind of extension to perform - output logic [XLEN-1:0] ImmExtD); // Extended immediate +module extend import cvw::*; #(parameter cvw_t P) ( + input logic [31:7] InstrD, // All instruction bits except opcode (lower 7 bits) + input logic [2:0] ImmSrcD, // Select what kind of extension to perform + output logic [P.XLEN-1:0] ImmExtD); // Extended immediate - localparam [XLEN-1:0] undefined = {(XLEN){1'bx}}; // could change to 0 after debug + localparam [P.XLEN-1:0] undefined = {(P.XLEN){1'bx}}; // could change to 0 after debug always_comb - case(ImmSrcD) + case (ImmSrcD) // I-type - 3'b000: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:20]}; + 3'b000: ImmExtD = {{(P.XLEN-12){InstrD[31]}}, InstrD[31:20]}; // S-type (stores) - 3'b001: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; + 3'b001: ImmExtD = {{(P.XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; // B-type (branches) - 3'b010: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; + 3'b010: ImmExtD = {{(P.XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; // J-type (jal) - 3'b011: ImmExtD = {{(XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; + 3'b011: ImmExtD = {{(P.XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; // U-type (lui, auipc) - 3'b100: ImmExtD = {{(XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; + 3'b100: ImmExtD = {{(P.XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; // Store Conditional: zero offset - 3'b101: if (A_SUPPORTED) ImmExtD = 0; + 3'b101: if (P.A_SUPPORTED) ImmExtD = 0; else ImmExtD = undefined; default: ImmExtD = undefined; // undefined endcase + endmodule diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index fe8fb4ef3..2656b003d 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -45,8 +45,9 @@ module ieu import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B output logic [4:0] RdE, // Destination register output logic MDUActiveE, // Mul/Div instruction being executed - output logic [3:0] CMOpE, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero - output logic [2:0] PrefetchE, // which prefetch instruction 1: prefetch.i, 2: prefetch.r, 4: prefetch.w + output logic [3:0] CMOpM, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero + output logic IFUPrefetchE, // instruction prefetch + output logic LSUPrefetchM, // datata prefetch // Memory stage signals input logic SquashSCW, // Squash store conditional, from LSU output logic [1:0] MemRWM, // Read/write control goes to LSU @@ -105,7 +106,7 @@ module ieu import cvw::*; #(parameter cvw_t P) ( .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, .Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, - .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, .CMOpE, .PrefetchE, + .BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9c0d00543..9fda87cd0 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -39,6 +39,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input logic [6:0] Funct7M, // Atomic memory operation function input logic [1:0] AtomicM, // Atomic memory operation input logic FlushDCacheM, // Flush D cache to next level of memory + input logic [3:0] CMOpM, // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero + input logic LSUPrefetchM, // Prefetch output logic CommittedM, // Delay interrupts while memory operation in flight output logic SquashSCW, // Store conditional failed disable write to GPR output logic DCacheMiss, // D cache miss for performance counters @@ -224,7 +226,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [1:0] DTIMMemRWM; // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], MemRWM[0], DTIMAdr); + mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], MemRWM[0], DTIMAdr); assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. @@ -258,8 +260,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; - assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); + assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); + // *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23) + // *** prefetch can just act as a read operation cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 438214d2a..793ea5777 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -78,9 +78,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD; logic SquashSCW; logic MDUActiveE; // Mul/Div instruction being executed - logic [3:0] CMOpE; // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero - logic [2:0] PrefetchE; // 1: prefetch.i, 2: prefetch.r, 4: prefetch.w - logic [3:0] ENVCFG_CBE; // Cache block operation enables + logic [3:0] ENVCFG_CBE; // Cache Block operation enables + logic [3:0] CMOpM; // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero + logic IFUPrefetchE, LSUPrefetchM; // instruction / data prefetch hints // floating point unit signals logic [2:0] FRM_REGW; @@ -194,7 +194,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .InstrD, .STATUS_FS, .ENVCFG_CBE, .IllegalIEUFPUInstrD, .IllegalBaseInstrD, // Execute Stage interface .PCE, .PCLinkE, .FWriteIntE, .FCvtIntE, .IEUAdrE, .IntDivE, .W64E, - .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, .MDUActiveE, .CMOpE, .PrefetchE, + .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM, // Memory stage interface .SquashSCW, // from LSU .MemRWM, // read/write control goes to LSU @@ -218,7 +218,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), .AtomicM, .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM, .FWriteDataM, .IEUAdrE, .IEUAdrM, .WriteDataM, - .ReadDataW, .FlushDCacheM, + .ReadDataW, .FlushDCacheM, .CMOpM, .LSUPrefetchM, // connected to ahb (all stay the same) .LSUHADDR, .HRDATA, .LSUHWDATA, .LSUHWSTRB, .LSUHSIZE, .LSUHBURST, .LSUHTRANS, .LSUHWRITE, .LSUHREADY,