Modified the LSU/IFU and caches to improve critical path. Arty A7 went from 15 to 17Mhz. I believe we can push all the way to 20+Mhz with relatively little effort. Along the way I'm fixing up the scripts build the linux images for the flash card.

This commit is contained in:
Ross Thompson 2023-07-21 13:06:27 -05:00
parent 7873d26678
commit 37078f3d9b
5 changed files with 22 additions and 12 deletions

View File

@ -106,7 +106,9 @@ set_output_delay -clock [get_clocks clk_out3_xlnx_mmcm] -max -add_delay 6.000 [g
set_output_delay -clock [get_clocks clk_out3_xlnx_mmcm] 0.000 [get_ports SDCCLK] set_output_delay -clock [get_clocks clk_out3_xlnx_mmcm] 0.000 [get_ports SDCCLK]
set_multicycle_path -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 10 #set_multicycle_path -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 10
set_max_delay -datapath_only -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 20.000
# ********************************* # *********************************
#set_property DCI_CASCADE {64} [get_iobanks 65] #set_property DCI_CASCADE {64} [get_iobanks 65]

View File

@ -21,8 +21,8 @@
cpus { cpus {
#address-cells = <0x01>; #address-cells = <0x01>;
#size-cells = <0x00>; #size-cells = <0x00>;
clock-frequency = <0xE4E1C0>; clock-frequency = <0x1036640>;
timebase-frequency = <0xE4E1C0>; timebase-frequency = <0x1036640>;
cpu@0 { cpu@0 {
phandle = <0x01>; phandle = <0x01>;
@ -51,7 +51,7 @@
uart@10000000 { uart@10000000 {
interrupts = <0x0a>; interrupts = <0x0a>;
interrupt-parent = <0x03>; interrupt-parent = <0x03>;
clock-frequency = <0xE4E1C0>; clock-frequency = <0x1036640>;
reg = <0x00 0x10000000 0x00 0x100>; reg = <0x00 0x10000000 0x00 0x100>;
compatible = "ns16550a"; compatible = "ns16550a";
}; };

1
src/cache/cache.sv vendored
View File

@ -33,6 +33,7 @@ module cache import cvw::*; #(parameter cvw_t P,
input logic reset, input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic IgnoreRequestTLB, //
// cpu side // cpu side
input logic [1:0] CacheRW, // [1] Read, [0] Write input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic [1:0] CacheAtomic, // Atomic operation input logic [1:0] CacheAtomic, // Atomic operation

View File

@ -229,13 +229,14 @@ module ifu import cvw::*; #(parameter cvw_t P) (
logic [P.PA_BITS-1:0] ICacheBusAdr; logic [P.PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck; logic ICacheBusAck;
logic [1:0] CacheBusRW, BusRW, CacheRWF; logic [1:0] CacheBusRW, BusRW, CacheRWF;
logic [1:0] CacheBusRWTemp;
assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0;
assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0;
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.ICACHE_LINELENINBITS), cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.ICACHE_LINELENINBITS),
.NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS), .NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS),
.NUMWAYS(P.ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1)) .NUMWAYS(P.ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1))
icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD), icache(.clk, .reset, .FlushStage(FlushD), .IgnoreRequestTLB(1'b0), .Stall(GatedStallD),
.FetchBuffer, .CacheBusAck(ICacheBusAck), .FetchBuffer, .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheBusRW, .CacheBusRW,
@ -249,6 +250,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
.NextSet(PCSpillNextF[11:0]), .NextSet(PCSpillNextF[11:0]),
.PAdr(PCPF), .PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1)
ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), ahbcacheinterface(.HCLK(clk), .HRESETn(~reset),
.HRDATA, .HRDATA,

View File

@ -255,27 +255,32 @@ module lsu import cvw::*; #(parameter cvw_t P) (
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11) logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
logic [1:0] CacheAtomicM; // Cache AMO logic [1:0] CacheAtomicM; // Cache AMO
logic FlushDCache; // Suppress d cache flush if there is an ITLB miss. logic FlushDCache; // Suppress d cache flush if there is an ITLB miss.
logic CacheStall;
logic [1:0] CacheBusRWTemp;
assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0;
assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM;
assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0;
assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; assign CacheAtomicM = CacheableM & ~SelDTIM ? LSUAtomicM : '0;
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); assign FlushDCache = FlushDCacheM & ~(SelHPTW);
// *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23) // *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23)
// *** prefetch can just act as a read operation // *** prefetch can just act as a read operation
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache(
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .IgnoreRequestTLB, .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
.FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM),
.ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
.CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheWriteData(LSUWriteDataM), .SelHPTW,
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.CacheCommitted(DCacheCommittedM), .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM),
.FetchBuffer, .CacheBusRW, .FetchBuffer, .CacheBusRW(CacheBusRWTemp),
.CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
assign DCacheStallM = CacheStall & ~IgnoreRequestTLB;
assign CacheBusRW = IgnoreRequestTLB ? 2'b0 : CacheBusRWTemp;
ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface(
.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HCLK(clk), .HRESETn(~reset), .Flush(FlushW),
.HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB),