This commit is contained in:
David Harris 2021-12-14 11:15:58 -08:00
commit 0e9fe6c214
11 changed files with 1349 additions and 918 deletions

5
.gitignore vendored
View File

@ -38,3 +38,8 @@ wally-pipelined/regression/slack-notifier/slack-webhook-url.txt
wally-pipelined/regression/logs
fpga/generator/IP
fpga/generator/vivado.*
fpga/generator/.Xil/*
fpga/generator/WallyFPGA*
fpga/generator/reports/
fpga/generator/*.log
fpga/generator/*.jou

@ -1 +1 @@
Subproject commit ddcfa6cc3d80818140a459e590296c3079c5a3ec
Subproject commit d22b280198e74b871e04fc0ddb622fb825fdae49

View File

@ -3,41 +3,8 @@
# mmcm_clkout0 is the clock output of the DDR4 memory interface / 4.
# This clock is not used by wally or the AHBLite Bus. However it is used by the AXI BUS on the DD4 IP.
# generate 1 clock for the slow speed SD Card hardware. However we need to time at the mmcm_clkout1
# clock speed.
#create_generated_clock -name r_fd_Q -source [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/toggle_flip_flop/i_CLK] -divide_by 50 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/toggle_flip_flop/r_fd_Q]
#create_clock -period 4.000 [get_ports default_250mhz_clk1_0_p]
# need to create a clock for mmcm_clkout1. In the gui flow this was auto generated somehow.
# turns out this clock is auto generated but has a different name. wtf
# 10 Mhz
#create_clock -name mmcm_clkout1 -period 100 [get_pins xlnx_ddr4_c0/addn_ui_clkout1]
#create_generated_clock -name mmcm_clkout1 -source [get_pins xlnx_ddr4_c0/c0_sys_clk_p] -edges {1 2 3} -edge_shift {0.000 48.000 96.000} [get_pins xlnx_ddr4_c0/addn_ui_clkout1]
#create_generated_clock -name mmcm_clkout1 xlnx_ddr4_c0/addn_ui_clkout1
#create_generated_clock -name mmcm_clkout1 mmcm_clkout1
create_generated_clock -name CLKDiv64_Gen -source [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -multiply_by 1 [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O]
#create_generated_clock -name mmcm_clkout1_Gen -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -divide_by 1 -add -master_clock mmcm_clkout1 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O]
#create_generated_clock -name CLKDiv64_Gen -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I1] -divide_by 1 -add -master_clock mmcm_clkout1_Gen [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O]
#create_generated_clock -name mmcm_clkout1_Gen_slow -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -divide_by 8 -add -master_clock mmcm_clkout1 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O]
#create_generated_clock -name CLKDiv64_Gen_slow -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I1] -divide_by 8 -add -master_clock mmcm_clkout1_Gen_slow [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O]
#set_clock_groups -logically_exclusive -group [get_clocks -include_generated_clocks mmcm_clkout1_Gen] -group [get_clocks -include_generated_clocks CLKDiv64_Gen]
#set_clock_groups -logically_exclusive -group [get_clocks -include_generated_clocks mmcm_clkout1_Gen] -group [get_clocks -include_generated_clocks CLKDiv64_Gen_slow]
##### GPI ####
set_property PACKAGE_PIN BB24 [get_ports {GPI[0]}]
set_property PACKAGE_PIN BF22 [get_ports {GPI[1]}]

View File

@ -19,111 +19,18 @@ read_verilog {../src/fpgaTop.v}
set_property include_dirs {../../wally-pipelined/config/fpga ../../wally-pipelined/config/shared} [current_fileset]
# contrainsts generated by the IP blocks
#add_files -fileset constrs_1 -norecurse IP/xlnx_ahblite_axi_bridge.gen/sources_1/ip/xlnx_ahblite_axi_bridge/xlnx_ahblite_axi_bridge_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ahblite_axi_bridge.gen/sources_1/ip/xlnx_ahblite_axi_bridge/xlnx_ahblite_axi_bridge_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_1/par/xlnx_ddr4_phy_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_1/par/xlnx_ddr4_phy_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0_board.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0_board.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_2/bd_1ba7_ilmb_0.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_2/bd_1ba7_ilmb_0.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_3/bd_1ba7_dlmb_0.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_3/bd_1ba7_dlmb_0.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_6/bd_1ba7_lmb_bram_I_0_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_6/bd_1ba7_lmb_bram_I_0_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_9/bd_1ba7_second_lmb_bram_I_0_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_9/bd_1ba7_second_lmb_bram_I_0_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_10/bd_1ba7_iomodule_0_0_board.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_10/bd_1ba7_iomodule_0_0_board.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/bd_1ba7_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/bd_1ba7_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_board.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_board.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/xlnx_ddr4_board.xdc
#set_property PROCESSING_ORDER LATE [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/xlnx_ddr4_board.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/par/xlnx_ddr4.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/par/xlnx_ddr4.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_board.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_board.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc]
add_files -fileset constrs_1 -norecurse ../constraints/constraints.xdc
set_property PROCESSING_ORDER NORMAL [get_files ../constraints/constraints.xdc]
# implementation only
#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_clocks.xdc
#set_property PROCESSING_ORDER LATE [get_files IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_clocks.xdc]
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0_ooc_debug.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_ahblite_axi_bridge.runs/xlnx_ahblite_axi_bridge_synth_1/dont_touch.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.runs/xlnx_proc_sys_reset_synth_1/dont_touch.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.runs/xlnx_axi_clock_converter_synth_1/.Xil/xlnx_axi_clock_converter_propImpl.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.runs/xlnx_axi_clock_converter_synth_1/dont_touch.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.runs/xlnx_ddr4_synth_1/.Xil/xlnx_ddr4_propImpl.xdc
#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.runs/xlnx_ddr4_synth_1/dont_touch.xdc
# constraints for wally top level
# define top level
set_property top fpgaTop [current_fileset]
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc]
#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc]
update_compile_order -fileset sources_1
# this line is wrong vvv
#update_compile_order -fileset constrs_1
# This is important as the ddr4 IP contains the generate clock constraint which the user constraints depend on.
exec mkdir -p reports/
exec rm -rf reports/*
report_compile_order -constraints > reports/compile_order.rpt
# this is elaboration not synthesis.
@ -137,8 +44,6 @@ launch_runs synth_1 -jobs 4
wait_on_run synth_1
open_run synth_1
exec mkdir -p reports/
exec rm -rf reports/*
check_timing -verbose -file reports/check_timing.rpt
report_timing -max_paths 10 -nworst 10 -delay_type max -sort_by slack -file reports/timing_WORST_10.rpt

View File

@ -41,7 +41,7 @@ set_property -dict [list CONFIG.C0.ControllerType {DDR4_SDRAM} \
CONFIG.C0.DDR4_CLKOUT0_DIVIDE {6} \
CONFIG.Reference_Clock {Differential} \
CONFIG.ADDN_UI_CLKOUT1.INSERT_VIP {0} \
CONFIG.ADDN_UI_CLKOUT1_FREQ_HZ {10} \
CONFIG.ADDN_UI_CLKOUT1_FREQ_HZ {23} \
CONFIG.ADDN_UI_CLKOUT2.INSERT_VIP {0} \
CONFIG.ADDN_UI_CLKOUT2_FREQ_HZ {208} \
CONFIG.ADDN_UI_CLKOUT3.INSERT_VIP {0} \

View File

@ -0,0 +1,10 @@
cd <some directory to download qemu>
git clone https://github.com/qemu/qemu
cd qemu
git checkout dbdc621be937d9efe3e4dff994e54e8eea051f7a
git apply wallyVirtIO.patch # located in riscv-wally/wally-pipelined/linux-testgen/wallyVirtIO.patch
sudo apt install ninja-build # or your equivalent
sudo apt install libglib2.0-dev # or your equivalent
sudo apt install libpixman-1-dev libcairo2-dev libpango1.0-dev libjpeg8-dev libgif-dev
./configure --target-list=riscv64-softmmu
make --jobs

View File

@ -0,0 +1,542 @@
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4a3cd2599a..39b46e3122 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -20,6 +20,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qemu/log.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "hw/boards.h"
@@ -44,19 +45,10 @@
#include "hw/display/ramfb.h"
static const MemMapEntry virt_memmap[] = {
- [VIRT_DEBUG] = { 0x0, 0x100 },
[VIRT_MROM] = { 0x1000, 0xf000 },
- [VIRT_TEST] = { 0x100000, 0x1000 },
- [VIRT_RTC] = { 0x101000, 0x1000 },
[VIRT_CLINT] = { 0x2000000, 0x10000 },
- [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 },
[VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
[VIRT_UART0] = { 0x10000000, 0x100 },
- [VIRT_VIRTIO] = { 0x10001000, 0x1000 },
- [VIRT_FW_CFG] = { 0x10100000, 0x18 },
- [VIRT_FLASH] = { 0x20000000, 0x4000000 },
- [VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 },
- [VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 },
[VIRT_DRAM] = { 0x80000000, 0x0 },
};
@@ -67,139 +59,23 @@ static const MemMapEntry virt_memmap[] = {
/* PCIe high mmio for RV64, size is fixed but base depends on top of RAM */
#define VIRT64_HIGH_PCIE_MMIO_SIZE (16 * GiB)
-static MemMapEntry virt_high_pcie_memmap;
-
#define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
-static PFlashCFI01 *virt_flash_create1(RISCVVirtState *s,
- const char *name,
- const char *alias_prop_name)
-{
- /*
- * Create a single flash device. We use the same parameters as
- * the flash devices on the ARM virt board.
- */
- DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01);
-
- qdev_prop_set_uint64(dev, "sector-length", VIRT_FLASH_SECTOR_SIZE);
- qdev_prop_set_uint8(dev, "width", 4);
- qdev_prop_set_uint8(dev, "device-width", 2);
- qdev_prop_set_bit(dev, "big-endian", false);
- qdev_prop_set_uint16(dev, "id0", 0x89);
- qdev_prop_set_uint16(dev, "id1", 0x18);
- qdev_prop_set_uint16(dev, "id2", 0x00);
- qdev_prop_set_uint16(dev, "id3", 0x00);
- qdev_prop_set_string(dev, "name", name);
-
- object_property_add_child(OBJECT(s), name, OBJECT(dev));
- object_property_add_alias(OBJECT(s), alias_prop_name,
- OBJECT(dev), "drive");
-
- return PFLASH_CFI01(dev);
-}
-
-static void virt_flash_create(RISCVVirtState *s)
-{
- s->flash[0] = virt_flash_create1(s, "virt.flash0", "pflash0");
- s->flash[1] = virt_flash_create1(s, "virt.flash1", "pflash1");
-}
-
-static void virt_flash_map1(PFlashCFI01 *flash,
- hwaddr base, hwaddr size,
- MemoryRegion *sysmem)
-{
- DeviceState *dev = DEVICE(flash);
-
- assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE));
- assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX);
- qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE);
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-
- memory_region_add_subregion(sysmem, base,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
- 0));
-}
-
-static void virt_flash_map(RISCVVirtState *s,
- MemoryRegion *sysmem)
-{
- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
-
- virt_flash_map1(s->flash[0], flashbase, flashsize,
- sysmem);
- virt_flash_map1(s->flash[1], flashbase + flashsize, flashsize,
- sysmem);
-}
-
-static void create_pcie_irq_map(void *fdt, char *nodename,
- uint32_t plic_phandle)
-{
- int pin, dev;
- uint32_t
- full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * FDT_INT_MAP_WIDTH] = {};
- uint32_t *irq_map = full_irq_map;
-
- /* This code creates a standard swizzle of interrupts such that
- * each device's first interrupt is based on it's PCI_SLOT number.
- * (See pci_swizzle_map_irq_fn())
- *
- * We only need one entry per interrupt in the table (not one per
- * possible slot) seeing the interrupt-map-mask will allow the table
- * to wrap to any number of devices.
- */
- for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
- int devfn = dev * 0x8;
-
- for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
- int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
- int i = 0;
-
- irq_map[i] = cpu_to_be32(devfn << 8);
-
- i += FDT_PCI_ADDR_CELLS;
- irq_map[i] = cpu_to_be32(pin + 1);
-
- i += FDT_PCI_INT_CELLS;
- irq_map[i++] = cpu_to_be32(plic_phandle);
-
- i += FDT_PLIC_ADDR_CELLS;
- irq_map[i] = cpu_to_be32(irq_nr);
-
- irq_map += FDT_INT_MAP_WIDTH;
- }
- }
-
- qemu_fdt_setprop(fdt, nodename, "interrupt-map",
- full_irq_map, sizeof(full_irq_map));
-
- qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
- 0x1800, 0, 0, 0x7);
-}
-
static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
uint64_t mem_size, const char *cmdline, bool is_32_bit)
{
void *fdt;
- int i, cpu, socket;
+ //int i, cpu, socket;
+ int cpu, socket;
MachineState *mc = MACHINE(s);
uint64_t addr, size;
uint32_t *clint_cells, *plic_cells;
unsigned long clint_addr, plic_addr;
uint32_t plic_phandle[MAX_NODES];
- uint32_t cpu_phandle, intc_phandle, test_phandle;
+ uint32_t cpu_phandle, intc_phandle;
uint32_t phandle = 1, plic_mmio_phandle = 1;
- uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1;
char *mem_name, *cpu_name, *core_name, *intc_name;
char *name, *clint_name, *plic_name, *clust_name;
- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
- static const char * const clint_compat[2] = {
- "sifive,clint0", "riscv,clint0"
- };
- static const char * const plic_compat[2] = {
- "sifive,plic-1.0.0", "riscv,plic0"
- };
if (mc->dtb) {
fdt = mc->fdt = load_device_tree(mc->dtb, &s->fdt_size);
@@ -305,8 +181,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
(memmap[VIRT_CLINT].size * socket);
clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
qemu_fdt_add_subnode(fdt, clint_name);
- qemu_fdt_setprop_string_array(fdt, clint_name, "compatible",
- (char **)&clint_compat, ARRAY_SIZE(clint_compat));
+ qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0");
qemu_fdt_setprop_cells(fdt, clint_name, "reg",
0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
qemu_fdt_setprop(fdt, clint_name, "interrupts-extended",
@@ -322,8 +197,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
"#address-cells", FDT_PLIC_ADDR_CELLS);
qemu_fdt_setprop_cell(fdt, plic_name,
"#interrupt-cells", FDT_PLIC_INT_CELLS);
- qemu_fdt_setprop_string_array(fdt, plic_name, "compatible",
- (char **)&plic_compat, ARRAY_SIZE(plic_compat));
+ qemu_fdt_setprop_string(fdt, plic_name, "compatible", "riscv,plic0");
qemu_fdt_setprop(fdt, plic_name, "interrupt-controller", NULL, 0);
qemu_fdt_setprop(fdt, plic_name, "interrupts-extended",
plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
@@ -342,95 +216,11 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
for (socket = 0; socket < riscv_socket_count(mc); socket++) {
if (socket == 0) {
plic_mmio_phandle = plic_phandle[socket];
- plic_virtio_phandle = plic_phandle[socket];
- plic_pcie_phandle = plic_phandle[socket];
- }
- if (socket == 1) {
- plic_virtio_phandle = plic_phandle[socket];
- plic_pcie_phandle = plic_phandle[socket];
- }
- if (socket == 2) {
- plic_pcie_phandle = plic_phandle[socket];
}
}
riscv_socket_fdt_write_distance_matrix(mc, fdt);
- for (i = 0; i < VIRTIO_COUNT; i++) {
- name = g_strdup_printf("/soc/virtio_mmio@%lx",
- (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_string(fdt, name, "compatible", "virtio,mmio");
- qemu_fdt_setprop_cells(fdt, name, "reg",
- 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
- 0x0, memmap[VIRT_VIRTIO].size);
- qemu_fdt_setprop_cell(fdt, name, "interrupt-parent",
- plic_virtio_phandle);
- qemu_fdt_setprop_cell(fdt, name, "interrupts", VIRTIO_IRQ + i);
- g_free(name);
- }
-
- name = g_strdup_printf("/soc/pci@%lx",
- (long) memmap[VIRT_PCIE_ECAM].base);
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_cell(fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS);
- qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", FDT_PCI_INT_CELLS);
- qemu_fdt_setprop_cell(fdt, name, "#size-cells", 0x2);
- qemu_fdt_setprop_string(fdt, name, "compatible", "pci-host-ecam-generic");
- qemu_fdt_setprop_string(fdt, name, "device_type", "pci");
- qemu_fdt_setprop_cell(fdt, name, "linux,pci-domain", 0);
- qemu_fdt_setprop_cells(fdt, name, "bus-range", 0,
- memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
- qemu_fdt_setprop(fdt, name, "dma-coherent", NULL, 0);
- qemu_fdt_setprop_cells(fdt, name, "reg", 0,
- memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size);
- qemu_fdt_setprop_sized_cells(fdt, name, "ranges",
- 1, FDT_PCI_RANGE_IOPORT, 2, 0,
- 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size,
- 1, FDT_PCI_RANGE_MMIO,
- 2, memmap[VIRT_PCIE_MMIO].base,
- 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size,
- 1, FDT_PCI_RANGE_MMIO_64BIT,
- 2, virt_high_pcie_memmap.base,
- 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size);
-
- create_pcie_irq_map(fdt, name, plic_pcie_phandle);
- g_free(name);
-
- test_phandle = phandle++;
- name = g_strdup_printf("/soc/test@%lx",
- (long)memmap[VIRT_TEST].base);
- qemu_fdt_add_subnode(fdt, name);
- {
- static const char * const compat[3] = {
- "sifive,test1", "sifive,test0", "syscon"
- };
- qemu_fdt_setprop_string_array(fdt, name, "compatible", (char **)&compat,
- ARRAY_SIZE(compat));
- }
- qemu_fdt_setprop_cells(fdt, name, "reg",
- 0x0, memmap[VIRT_TEST].base,
- 0x0, memmap[VIRT_TEST].size);
- qemu_fdt_setprop_cell(fdt, name, "phandle", test_phandle);
- test_phandle = qemu_fdt_get_phandle(fdt, name);
- g_free(name);
-
- name = g_strdup_printf("/soc/reboot");
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot");
- qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle);
- qemu_fdt_setprop_cell(fdt, name, "offset", 0x0);
- qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_RESET);
- g_free(name);
-
- name = g_strdup_printf("/soc/poweroff");
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-poweroff");
- qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle);
- qemu_fdt_setprop_cell(fdt, name, "offset", 0x0);
- qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_PASS);
- g_free(name);
-
name = g_strdup_printf("/soc/uart@%lx", (long)memmap[VIRT_UART0].base);
qemu_fdt_add_subnode(fdt, name);
qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a");
@@ -445,102 +235,12 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", name);
g_free(name);
- name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base);
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_string(fdt, name, "compatible", "google,goldfish-rtc");
- qemu_fdt_setprop_cells(fdt, name, "reg",
- 0x0, memmap[VIRT_RTC].base,
- 0x0, memmap[VIRT_RTC].size);
- qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle);
- qemu_fdt_setprop_cell(fdt, name, "interrupts", RTC_IRQ);
- g_free(name);
-
- name = g_strdup_printf("/soc/flash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(mc->fdt, name);
- qemu_fdt_setprop_string(mc->fdt, name, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(mc->fdt, name, "reg",
- 2, flashbase, 2, flashsize,
- 2, flashbase + flashsize, 2, flashsize);
- qemu_fdt_setprop_cell(mc->fdt, name, "bank-width", 4);
- g_free(name);
-
update_bootargs:
if (cmdline) {
qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
}
}
-static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
- hwaddr ecam_base, hwaddr ecam_size,
- hwaddr mmio_base, hwaddr mmio_size,
- hwaddr high_mmio_base,
- hwaddr high_mmio_size,
- hwaddr pio_base,
- DeviceState *plic)
-{
- DeviceState *dev;
- MemoryRegion *ecam_alias, *ecam_reg;
- MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg;
- qemu_irq irq;
- int i;
-
- dev = qdev_new(TYPE_GPEX_HOST);
-
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-
- ecam_alias = g_new0(MemoryRegion, 1);
- ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam",
- ecam_reg, 0, ecam_size);
- memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias);
-
- mmio_alias = g_new0(MemoryRegion, 1);
- mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
- memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio",
- mmio_reg, mmio_base, mmio_size);
- memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias);
-
- /* Map high MMIO space */
- high_mmio_alias = g_new0(MemoryRegion, 1);
- memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high",
- mmio_reg, high_mmio_base, high_mmio_size);
- memory_region_add_subregion(get_system_memory(), high_mmio_base,
- high_mmio_alias);
-
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
-
- for (i = 0; i < GPEX_NUM_IRQS; i++) {
- irq = qdev_get_gpio_in(plic, PCIE_IRQ + i);
-
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
- gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
- }
-
- return dev;
-}
-
-static FWCfgState *create_fw_cfg(const MachineState *mc)
-{
- hwaddr base = virt_memmap[VIRT_FW_CFG].base;
- hwaddr size = virt_memmap[VIRT_FW_CFG].size;
- FWCfgState *fw_cfg;
- char *nodename;
-
- fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16,
- &address_space_memory);
- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus);
-
- nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
- qemu_fdt_add_subnode(mc->fdt, nodename);
- qemu_fdt_setprop_string(mc->fdt, nodename,
- "compatible", "qemu,fw-cfg-mmio");
- qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg",
- 2, base, 2, size);
- qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0);
- g_free(nodename);
- return fw_cfg;
-}
-
static void virt_machine_init(MachineState *machine)
{
const MemMapEntry *memmap = virt_memmap;
@@ -554,7 +254,7 @@ static void virt_machine_init(MachineState *machine)
target_ulong firmware_end_addr, kernel_start_addr;
uint32_t fdt_load_addr;
uint64_t kernel_entry;
- DeviceState *mmio_plic, *virtio_plic, *pcie_plic;
+ DeviceState *mmio_plic;
int i, j, base_hartid, hart_count;
/* Check socket count limit */
@@ -565,7 +265,7 @@ static void virt_machine_init(MachineState *machine)
}
/* Initialize sockets */
- mmio_plic = virtio_plic = pcie_plic = NULL;
+ mmio_plic = NULL;
for (i = 0; i < riscv_socket_count(machine); i++) {
if (!riscv_socket_check_hartids(machine, i)) {
error_report("discontinuous hartids in socket%d", i);
@@ -634,15 +334,6 @@ static void virt_machine_init(MachineState *machine)
/* Try to use different PLIC instance based device type */
if (i == 0) {
mmio_plic = s->plic[i];
- virtio_plic = s->plic[i];
- pcie_plic = s->plic[i];
- }
- if (i == 1) {
- virtio_plic = s->plic[i];
- pcie_plic = s->plic[i];
- }
- if (i == 2) {
- pcie_plic = s->plic[i];
}
}
@@ -654,13 +345,6 @@ static void virt_machine_init(MachineState *machine)
error_report("Limiting RAM size to 10 GiB");
}
#endif
- virt_high_pcie_memmap.base = VIRT32_HIGH_PCIE_MMIO_BASE;
- virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE;
- } else {
- virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE;
- virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size;
- virt_high_pcie_memmap.base =
- ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size);
}
/* register system main memory (actual RAM) */
@@ -681,10 +365,12 @@ static void virt_machine_init(MachineState *machine)
if (riscv_is_32bit(&s->soc[0])) {
firmware_end_addr = riscv_find_and_load_firmware(machine,
- RISCV32_BIOS_BIN, start_addr, NULL);
+ "opensbi-riscv32-generic-fw_dynamic.bin",
+ start_addr, NULL);
} else {
firmware_end_addr = riscv_find_and_load_firmware(machine,
- RISCV64_BIOS_BIN, start_addr, NULL);
+ "opensbi-riscv64-generic-fw_dynamic.bin",
+ start_addr, NULL);
}
if (machine->kernel_filename) {
@@ -712,21 +398,6 @@ static void virt_machine_init(MachineState *machine)
kernel_entry = 0;
}
- if (drive_get(IF_PFLASH, 0, 0)) {
- /*
- * Pflash was supplied, let's overwrite the address we jump to after
- * reset to the base of the flash.
- */
- start_addr = virt_memmap[VIRT_FLASH].base;
- }
-
- /*
- * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the device
- * tree cannot be altered and we get FDT_ERR_NOSPACE.
- */
- s->fw_cfg = create_fw_cfg(machine);
- rom_set_fw(s->fw_cfg);
-
/* Compute the fdt load address in dram */
fdt_load_addr = riscv_load_fdt(memmap[VIRT_DRAM].base,
machine->ram_size, machine->fdt);
@@ -736,41 +407,10 @@ static void virt_machine_init(MachineState *machine)
virt_memmap[VIRT_MROM].size, kernel_entry,
fdt_load_addr, machine->fdt);
- /* SiFive Test MMIO device */
- sifive_test_create(memmap[VIRT_TEST].base);
-
- /* VirtIO MMIO devices */
- for (i = 0; i < VIRTIO_COUNT; i++) {
- sysbus_create_simple("virtio-mmio",
- memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
- qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i));
- }
-
- gpex_pcie_init(system_memory,
- memmap[VIRT_PCIE_ECAM].base,
- memmap[VIRT_PCIE_ECAM].size,
- memmap[VIRT_PCIE_MMIO].base,
- memmap[VIRT_PCIE_MMIO].size,
- virt_high_pcie_memmap.base,
- virt_high_pcie_memmap.size,
- memmap[VIRT_PCIE_PIO].base,
- DEVICE(pcie_plic));
-
- serial_mm_init(system_memory, memmap[VIRT_UART0].base,
+ serial_mm_init(system_memory, memmap[VIRT_UART0].base,
0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193,
serial_hd(0), DEVICE_LITTLE_ENDIAN);
- sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base,
- qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ));
-
- virt_flash_create(s);
-
- for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
- /* Map legacy -drive if=pflash to machine properties */
- pflash_cfi01_legacy_drive(s->flash[i],
- drive_get(IF_PFLASH, 0, i));
- }
- virt_flash_map(s, system_memory);
}
static void virt_machine_instance_init(Object *obj)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 991a6bb760..401028b8d9 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -269,6 +269,15 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags)
qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mip ", env->mip);
qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mie ", env->mie);
qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mideleg ", env->mideleg);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mcounteren ", env->mcounteren);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "misa ", env->misa);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mscratch ", env->mscratch);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "satp ", env->satp);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "scounteren ", env->scounteren);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "sscratch ", env->sscratch);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "stvec ", env->stvec);
+ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mhartid ", env->mhartid);
+
if (riscv_has_ext(env, RVH)) {
qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "hideleg ", env->hideleg);
}

View File

@ -1,8 +1,8 @@
Intractions betwen the dcache and hardware page table walker are complex.
In particular the complications arise when a fault occurs concurrently with a memory operation.
At the begining of very memory operation there are 8 combinations of three signals;
ITBL miss, DTLB miss, and memory operation. By looking at each combination we
At the begining of every memory operation there are 8 combinations of three signals;
ITBL miss, DTLB miss, and a memory operation. By looking at each combination we
can understand exactly the correct sequence of operations and if the operation
should continue.
@ -47,7 +47,7 @@ Dcache handles the operation.
Type 3a is a memory operation with a DTLB miss. The Dcache enters a special set of states
designed to handle the page table walker (HTPW). Secondly the HPTW takes control over the
LSU via a set of multiplexors in the LSU Arbiter, driving the Dcache with addresses into the
LSU via a set of multiplexors in the LSU Arbiter, driving the Dcache with addresses of the
page table. Interally to the HPTW an FSM checks each node of the Page Table and eventually
signals either a TLB write or a TLB Fault. In Type 3a the DTLB is written with the leaf
page table entry and returns control of the Dcache back to the IEU. Now the Dcache finishes
@ -70,7 +70,7 @@ Type 4b is also an ITLB miss. As with 4a the Dcache switches into page table wa
until it finds a leaf or in this case a fault. The fault is deteched and the Dcaches switches back
to normal mode.
Type 5a is a Type 4a with a current memory operation. The Dcache first switches to walker mode
Type 5a is a Type 4a with a current memory operation. The Dcache first switches to walker mode.
Other traps.
A new problem has emerged. What happens when an interrupt occurs during a page table walk?

File diff suppressed because it is too large Load Diff

View File

@ -27,95 +27,95 @@
module icachefsm
(// Inputs from pipeline
input logic clk, reset,
input logic clk, reset,
input logic StallF,
input logic StallF,
// inputs from mmu
input logic ITLBMissF,
input logic ITLBWriteF,
input logic WalkerInstrPageFaultF,
input logic ITLBMissF,
input logic ITLBWriteF,
input logic WalkerInstrPageFaultF,
input logic ExceptionM, PendingInterruptM,
input logic ExceptionM, PendingInterruptM,
// BUS interface
input logic InstrAckF,
input logic InstrAckF,
// icache internal inputs
input logic hit,
input logic FetchCountFlag,
input logic spill,
input logic hit,
input logic FetchCountFlag,
input logic spill,
// icache internal outputs
output logic ICacheReadEn,
output logic ICacheReadEn,
// Load data into the cache
output logic ICacheMemWriteEnable,
output logic ICacheMemWriteEnable,
// Outputs to pipeline control stuff
output logic ICacheStallF,
output logic ICacheStallF,
// Bus interface outputs
output logic InstrReadF,
output logic InstrReadF,
// icache internal outputs
output logic spillSave,
output logic CntEn,
output logic CntReset,
output logic spillSave,
output logic CntEn,
output logic CntReset,
output logic [1:0] SelAdr,
output logic LRUWriteEn
output logic LRUWriteEn
);
// FSM states
typedef enum {STATE_READY,
STATE_HIT_SPILL, // spill, block 0 hit
STATE_HIT_SPILL_MISS_FETCH_WDV, // block 1 miss, issue read to AHB and wait data.
STATE_HIT_SPILL_MISS_FETCH_DONE, // write data into SRAM/LUT
STATE_HIT_SPILL_MERGE, // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
typedef enum {STATE_READY,
STATE_HIT_SPILL, // spill, block 0 hit
STATE_HIT_SPILL_MISS_FETCH_WDV, // block 1 miss, issue read to AHB and wait data.
STATE_HIT_SPILL_MISS_FETCH_DONE, // write data into SRAM/LUT
STATE_HIT_SPILL_MERGE, // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
// a challenge is the spill signal gets us out of the ready state and moves us to
// 1 of the 2 spill branches. However the original fsm design had us return to
// the ready state when the spill + hits/misses were fully resolved. The problem
// is the spill signal is based on PCPF so when we return to READY to check if the
// cache has a hit it still expresses spill. We can fix in 1 of two ways.
// 1. we can add 1 extra state at the end of each spill branch to returns the instruction
// to the CPU advancing the CPU and icache to the next instruction.
// 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get
// to the READY state.
// The first first option is more robust and increases the number of states by 2. The
// second option is seams like it should work, but I worry there is a hidden interaction
// between CPU stalling and that register.
// Picking option 1.
// a challenge is the spill signal gets us out of the ready state and moves us to
// 1 of the 2 spill branches. However the original fsm design had us return to
// the ready state when the spill + hits/misses were fully resolved. The problem
// is the spill signal is based on PCPF so when we return to READY to check if the
// cache has a hit it still expresses spill. We can fix in 1 of two ways.
// 1. we can add 1 extra state at the end of each spill branch to returns the instruction
// to the CPU advancing the CPU and icache to the next instruction.
// 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get
// to the READY state.
// The first first option is more robust and increases the number of states by 2. The
// second option is seams like it should work, but I worry there is a hidden interaction
// between CPU stalling and that register.
// Picking option 1.
STATE_HIT_SPILL_FINAL, // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
STATE_HIT_SPILL_FINAL, // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
STATE_MISS_FETCH_WDV, // aligned miss, issue read to AHB and wait for data.
STATE_MISS_FETCH_DONE, // write data into SRAM/LUT
STATE_MISS_READ, // read block 1 from SRAM/LUT
STATE_MISS_READ_DELAY, // read block 1 from SRAM/LUT
STATE_MISS_FETCH_WDV, // aligned miss, issue read to AHB and wait for data.
STATE_MISS_FETCH_DONE, // write data into SRAM/LUT
STATE_MISS_READ, // read block 1 from SRAM/LUT
STATE_MISS_READ_DELAY, // read block 1 from SRAM/LUT
STATE_MISS_SPILL_FETCH_WDV, // spill, miss on block 0, issue read to AHB and wait
STATE_MISS_SPILL_FETCH_DONE, // write data into SRAM/LUT
STATE_MISS_SPILL_READ1, // read block 0 from SRAM/LUT
STATE_MISS_SPILL_2, // return to ready if hit or do second block update.
STATE_MISS_SPILL_2_START, // return to ready if hit or do second block update.
STATE_MISS_SPILL_MISS_FETCH_WDV, // miss on block 1, issue read to AHB and wait
STATE_MISS_SPILL_MISS_FETCH_DONE, // write data to SRAM/LUT
STATE_MISS_SPILL_MERGE, // read block 0 of CPU access,
STATE_MISS_SPILL_FETCH_WDV, // spill, miss on block 0, issue read to AHB and wait
STATE_MISS_SPILL_FETCH_DONE, // write data into SRAM/LUT
STATE_MISS_SPILL_READ1, // read block 0 from SRAM/LUT
STATE_MISS_SPILL_2, // return to ready if hit or do second block update.
STATE_MISS_SPILL_2_START, // return to ready if hit or do second block update.
STATE_MISS_SPILL_MISS_FETCH_WDV, // miss on block 1, issue read to AHB and wait
STATE_MISS_SPILL_MISS_FETCH_DONE, // write data to SRAM/LUT
STATE_MISS_SPILL_MERGE, // read block 0 of CPU access,
STATE_MISS_SPILL_FINAL, // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
STATE_MISS_SPILL_FINAL, // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
STATE_INVALIDATE, // *** not sure if invalidate or evict? invalidate by cache block or address?
STATE_TLB_MISS,
STATE_TLB_MISS_DONE,
STATE_INVALIDATE, // *** not sure if invalidate or evict? invalidate by cache block or address?
STATE_TLB_MISS,
STATE_TLB_MISS_DONE,
STATE_CPU_BUSY,
STATE_CPU_BUSY_SPILL
} statetype;
STATE_CPU_BUSY,
STATE_CPU_BUSY_SPILL
} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
logic PreCntEn;
logic PreCntEn;
// the FSM is always runing, do not stall.
always_ff @(posedge clk)
@ -141,18 +141,18 @@ module icachefsm
NextState = STATE_TLB_MISS;
end else if (hit & ~spill) begin
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
NextState = STATE_READY;
end
end
end else if (hit & spill) begin
spillSave = 1'b1;
SelAdr = 2'b10;
LRUWriteEn = 1'b1;
NextState = STATE_HIT_SPILL;
NextState = STATE_HIT_SPILL;
end else if (~hit & ~spill) begin
CntReset = 1'b1;
NextState = STATE_MISS_FETCH_WDV;
@ -161,12 +161,12 @@ module icachefsm
SelAdr = 2'b01;
NextState = STATE_MISS_SPILL_FETCH_WDV;
end else begin
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
NextState = STATE_READY;
end
end
end
end
// branch 1, hit spill and 2, miss spill hit
@ -204,15 +204,15 @@ module icachefsm
ICacheReadEn = 1'b1;
SelAdr = 2'b00;
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end else begin
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end else begin
NextState = STATE_READY;
end
end
end
// branch 3 miss no spill
STATE_MISS_FETCH_WDV: begin
@ -238,15 +238,15 @@ module icachefsm
STATE_MISS_READ_DELAY: begin
//SelAdr = 2'b01;
ICacheReadEn = 1'b1;
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
SelAdr = 2'b01;
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
SelAdr = 2'b01;
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
NextState = STATE_READY;
end
end
end
// branch 4 miss spill hit, and 5 miss spill miss
STATE_MISS_SPILL_FETCH_WDV: begin
@ -267,7 +267,7 @@ module icachefsm
STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block.
SelAdr = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives.
ICacheReadEn = 1'b1;
LRUWriteEn = 1'b1;
LRUWriteEn = 1'b1;
NextState = STATE_MISS_SPILL_2;
end
STATE_MISS_SPILL_2: begin
@ -284,13 +284,13 @@ module icachefsm
ICacheReadEn = 1'b1;
SelAdr = 2'b00;
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end else begin
NextState = STATE_READY;
end
end
end
end
STATE_MISS_SPILL_MISS_FETCH_WDV: begin
@ -317,13 +317,13 @@ module icachefsm
ICacheReadEn = 1'b1;
SelAdr = 2'b00;
ICacheStallF = 1'b0;
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end else begin
LRUWriteEn = 1'b1;
if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end else begin
NextState = STATE_READY;
end
end
end
STATE_TLB_MISS: begin
if (WalkerInstrPageFaultF) begin
@ -331,38 +331,40 @@ module icachefsm
ICacheStallF = 1'b0;
end else if (ITLBWriteF) begin
NextState = STATE_TLB_MISS_DONE;
ICacheStallF = 1'b1;
end else begin
NextState = STATE_TLB_MISS;
ICacheStallF = 1'b0;
end
end
STATE_TLB_MISS_DONE: begin
SelAdr = 2'b01;
SelAdr = 2'b01;
NextState = STATE_READY;
end
STATE_CPU_BUSY: begin
ICacheStallF = 1'b0;
if (ITLBMissF) begin
ICacheStallF = 1'b0;
if (ITLBMissF) begin
NextState = STATE_TLB_MISS;
end else if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
end
end else if(StallF) begin
NextState = STATE_CPU_BUSY;
SelAdr = 2'b01;
end
else begin
NextState = STATE_READY;
end
end
STATE_CPU_BUSY_SPILL: begin
ICacheStallF = 1'b0;
ICacheReadEn = 1'b1;
if (ITLBMissF) begin
ICacheStallF = 1'b0;
ICacheReadEn = 1'b1;
if (ITLBMissF) begin
NextState = STATE_TLB_MISS;
end else if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end
else begin
NextState = STATE_READY;
end
end else if(StallF) begin
NextState = STATE_CPU_BUSY_SPILL;
SelAdr = 2'b10;
end
else begin
NextState = STATE_READY;
end
end
default: begin
SelAdr = 2'b01;
@ -374,8 +376,8 @@ module icachefsm
assign CntEn = PreCntEn & InstrAckF;
assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) ||
(CurrState == STATE_MISS_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV);
(CurrState == STATE_MISS_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_FETCH_WDV) ||
(CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV);
endmodule

View File

@ -138,7 +138,7 @@ module uartPC16550D(
LSR <= #1 8'b01100000;
MSR <= #1 4'b0;
if (`FPGA) begin
DLL <= #1 8'd11;
DLL <= #1 8'd25;
DLM <= #1 8'b0;
end else begin
DLL <= #1 8'd1; // this cannot be zero with DLM also zer0.
@ -154,8 +154,8 @@ module uartPC16550D(
3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0];
-----/\----- EXCLUDED -----/\----- */
// *** BUG FIX ME for now for the divider to be 11. Our clock is 10 Mhz. 10Mhz /(11 * 16) = 56818 baud, which is close enough to 57600 baud
3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section
// *** BUG FIX ME for now for the divider to be 11. Our clock is 23 Mhz. 23Mhz /(25 * 16) = 57600 baud, which is close enough to 57600 baud
3'b000: if (DLAB) DLL <= #1 8'd25; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section
3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0];
3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing