diff --git a/.gitignore b/.gitignore index b88dfa3e..cd827ed9 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,8 @@ wally-pipelined/regression/slack-notifier/slack-webhook-url.txt wally-pipelined/regression/logs fpga/generator/IP fpga/generator/vivado.* +fpga/generator/.Xil/* +fpga/generator/WallyFPGA* +fpga/generator/reports/ +fpga/generator/*.log +fpga/generator/*.jou diff --git a/addins/riscv-isa-sim b/addins/riscv-isa-sim index ddcfa6cc..d22b2801 160000 --- a/addins/riscv-isa-sim +++ b/addins/riscv-isa-sim @@ -1 +1 @@ -Subproject commit ddcfa6cc3d80818140a459e590296c3079c5a3ec +Subproject commit d22b280198e74b871e04fc0ddb622fb825fdae49 diff --git a/fpga/constraints/constraints.xdc b/fpga/constraints/constraints.xdc index 716136ec..49afd9c5 100644 --- a/fpga/constraints/constraints.xdc +++ b/fpga/constraints/constraints.xdc @@ -3,41 +3,8 @@ # mmcm_clkout0 is the clock output of the DDR4 memory interface / 4. # This clock is not used by wally or the AHBLite Bus. However it is used by the AXI BUS on the DD4 IP. -# generate 1 clock for the slow speed SD Card hardware. However we need to time at the mmcm_clkout1 -# clock speed. - -#create_generated_clock -name r_fd_Q -source [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/toggle_flip_flop/i_CLK] -divide_by 50 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/toggle_flip_flop/r_fd_Q] - -#create_clock -period 4.000 [get_ports default_250mhz_clk1_0_p] - -# need to create a clock for mmcm_clkout1. In the gui flow this was auto generated somehow. -# turns out this clock is auto generated but has a different name. wtf -# 10 Mhz -#create_clock -name mmcm_clkout1 -period 100 [get_pins xlnx_ddr4_c0/addn_ui_clkout1] - -#create_generated_clock -name mmcm_clkout1 -source [get_pins xlnx_ddr4_c0/c0_sys_clk_p] -edges {1 2 3} -edge_shift {0.000 48.000 96.000} [get_pins xlnx_ddr4_c0/addn_ui_clkout1] - -#create_generated_clock -name mmcm_clkout1 xlnx_ddr4_c0/addn_ui_clkout1 -#create_generated_clock -name mmcm_clkout1 mmcm_clkout1 - create_generated_clock -name CLKDiv64_Gen -source [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -multiply_by 1 [get_pins wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O] - - -#create_generated_clock -name mmcm_clkout1_Gen -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -divide_by 1 -add -master_clock mmcm_clkout1 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O] - -#create_generated_clock -name CLKDiv64_Gen -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I1] -divide_by 1 -add -master_clock mmcm_clkout1_Gen [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O] - - - -#create_generated_clock -name mmcm_clkout1_Gen_slow -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I0] -divide_by 8 -add -master_clock mmcm_clkout1 [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O] - -#create_generated_clock -name CLKDiv64_Gen_slow -source [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/I1] -divide_by 8 -add -master_clock mmcm_clkout1_Gen_slow [get_pins wrapper_i/wallypipelinedsocwra_0/inst/wallypipelinedsoc/uncore/sdc.SDC/sd_top/slow_clk_divider/clkMux/O] - -#set_clock_groups -logically_exclusive -group [get_clocks -include_generated_clocks mmcm_clkout1_Gen] -group [get_clocks -include_generated_clocks CLKDiv64_Gen] -#set_clock_groups -logically_exclusive -group [get_clocks -include_generated_clocks mmcm_clkout1_Gen] -group [get_clocks -include_generated_clocks CLKDiv64_Gen_slow] - - ##### GPI #### set_property PACKAGE_PIN BB24 [get_ports {GPI[0]}] set_property PACKAGE_PIN BF22 [get_ports {GPI[1]}] diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index b9f55c0e..26b3babf 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -19,111 +19,18 @@ read_verilog {../src/fpgaTop.v} set_property include_dirs {../../wally-pipelined/config/fpga ../../wally-pipelined/config/shared} [current_fileset] -# contrainsts generated by the IP blocks - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ahblite_axi_bridge.gen/sources_1/ip/xlnx_ahblite_axi_bridge/xlnx_ahblite_axi_bridge_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ahblite_axi_bridge.gen/sources_1/ip/xlnx_ahblite_axi_bridge/xlnx_ahblite_axi_bridge_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_1/par/xlnx_ddr4_phy_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_1/par/xlnx_ddr4_phy_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0_board.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0_board.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_2/bd_1ba7_ilmb_0.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_2/bd_1ba7_ilmb_0.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_3/bd_1ba7_dlmb_0.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_3/bd_1ba7_dlmb_0.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_1/bd_1ba7_rst_0_0.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_6/bd_1ba7_lmb_bram_I_0_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_6/bd_1ba7_lmb_bram_I_0_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_9/bd_1ba7_second_lmb_bram_I_0_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_9/bd_1ba7_second_lmb_bram_I_0_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_10/bd_1ba7_iomodule_0_0_board.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_10/bd_1ba7_iomodule_0_0_board.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/bd_1ba7_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/bd_1ba7_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_board.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_board.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/xlnx_ddr4_board.xdc -#set_property PROCESSING_ORDER LATE [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/xlnx_ddr4_board.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/par/xlnx_ddr4.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/par/xlnx_ddr4.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_board.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_board.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset.xdc] - -#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc] - - - add_files -fileset constrs_1 -norecurse ../constraints/constraints.xdc set_property PROCESSING_ORDER NORMAL [get_files ../constraints/constraints.xdc] -# implementation only -#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_clocks.xdc -#set_property PROCESSING_ORDER LATE [get_files IP/xlnx_axi_clock_converter.gen/sources_1/ip/xlnx_axi_clock_converter/xlnx_axi_clock_converter_clocks.xdc] - - - - - - - - - - - - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/bd_0/ip/ip_0/bd_1ba7_microblaze_I_0_ooc_debug.xdc - -#add_files -fileset constrs_1 -norecurse IP/xlnx_ahblite_axi_bridge.runs/xlnx_ahblite_axi_bridge_synth_1/dont_touch.xdc - -#add_files -fileset constrs_1 -norecurse IP/xlnx_proc_sys_reset.runs/xlnx_proc_sys_reset_synth_1/dont_touch.xdc - - - -#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.runs/xlnx_axi_clock_converter_synth_1/.Xil/xlnx_axi_clock_converter_propImpl.xdc -#add_files -fileset constrs_1 -norecurse IP/xlnx_axi_clock_converter.runs/xlnx_axi_clock_converter_synth_1/dont_touch.xdc -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.runs/xlnx_ddr4_synth_1/.Xil/xlnx_ddr4_propImpl.xdc -#add_files -fileset constrs_1 -norecurse IP/xlnx_ddr4.runs/xlnx_ddr4_synth_1/dont_touch.xdc - -# constraints for wally top level - # define top level set_property top fpgaTop [current_fileset] -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_ddr4.gen/sources_1/ip/xlnx_ddr4/ip_0/xlnx_ddr4_microblaze_mcs_ooc.xdc] -#set_property PROCESSING_ORDER EARLY [get_files IP/xlnx_proc_sys_reset.gen/sources_1/ip/xlnx_proc_sys_reset/xlnx_proc_sys_reset_ooc.xdc] - update_compile_order -fileset sources_1 -# this line is wrong vvv -#update_compile_order -fileset constrs_1 # This is important as the ddr4 IP contains the generate clock constraint which the user constraints depend on. +exec mkdir -p reports/ +exec rm -rf reports/* + report_compile_order -constraints > reports/compile_order.rpt # this is elaboration not synthesis. @@ -137,8 +44,6 @@ launch_runs synth_1 -jobs 4 wait_on_run synth_1 open_run synth_1 -exec mkdir -p reports/ -exec rm -rf reports/* check_timing -verbose -file reports/check_timing.rpt report_timing -max_paths 10 -nworst 10 -delay_type max -sort_by slack -file reports/timing_WORST_10.rpt diff --git a/fpga/generator/xlnx_ddr4.tcl b/fpga/generator/xlnx_ddr4.tcl index 5602ca50..0b5e1a7e 100644 --- a/fpga/generator/xlnx_ddr4.tcl +++ b/fpga/generator/xlnx_ddr4.tcl @@ -41,7 +41,7 @@ set_property -dict [list CONFIG.C0.ControllerType {DDR4_SDRAM} \ CONFIG.C0.DDR4_CLKOUT0_DIVIDE {6} \ CONFIG.Reference_Clock {Differential} \ CONFIG.ADDN_UI_CLKOUT1.INSERT_VIP {0} \ - CONFIG.ADDN_UI_CLKOUT1_FREQ_HZ {10} \ + CONFIG.ADDN_UI_CLKOUT1_FREQ_HZ {23} \ CONFIG.ADDN_UI_CLKOUT2.INSERT_VIP {0} \ CONFIG.ADDN_UI_CLKOUT2_FREQ_HZ {208} \ CONFIG.ADDN_UI_CLKOUT3.INSERT_VIP {0} \ diff --git a/wally-pipelined/linux-testgen/qemu-build-instructions.md b/wally-pipelined/linux-testgen/qemu-build-instructions.md new file mode 100644 index 00000000..8347417b --- /dev/null +++ b/wally-pipelined/linux-testgen/qemu-build-instructions.md @@ -0,0 +1,10 @@ +cd +git clone https://github.com/qemu/qemu +cd qemu +git checkout dbdc621be937d9efe3e4dff994e54e8eea051f7a +git apply wallyVirtIO.patch # located in riscv-wally/wally-pipelined/linux-testgen/wallyVirtIO.patch +sudo apt install ninja-build # or your equivalent +sudo apt install libglib2.0-dev # or your equivalent +sudo apt install libpixman-1-dev libcairo2-dev libpango1.0-dev libjpeg8-dev libgif-dev +./configure --target-list=riscv64-softmmu +make --jobs diff --git a/wally-pipelined/linux-testgen/wallyVirtIO.patch b/wally-pipelined/linux-testgen/wallyVirtIO.patch new file mode 100644 index 00000000..76a1d240 --- /dev/null +++ b/wally-pipelined/linux-testgen/wallyVirtIO.patch @@ -0,0 +1,542 @@ +diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c +index 4a3cd2599a..39b46e3122 100644 +--- a/hw/riscv/virt.c ++++ b/hw/riscv/virt.c +@@ -20,6 +20,7 @@ + + #include "qemu/osdep.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "qemu/error-report.h" + #include "qapi/error.h" + #include "hw/boards.h" +@@ -44,19 +45,10 @@ + #include "hw/display/ramfb.h" + + static const MemMapEntry virt_memmap[] = { +- [VIRT_DEBUG] = { 0x0, 0x100 }, + [VIRT_MROM] = { 0x1000, 0xf000 }, +- [VIRT_TEST] = { 0x100000, 0x1000 }, +- [VIRT_RTC] = { 0x101000, 0x1000 }, + [VIRT_CLINT] = { 0x2000000, 0x10000 }, +- [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, + [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, + [VIRT_UART0] = { 0x10000000, 0x100 }, +- [VIRT_VIRTIO] = { 0x10001000, 0x1000 }, +- [VIRT_FW_CFG] = { 0x10100000, 0x18 }, +- [VIRT_FLASH] = { 0x20000000, 0x4000000 }, +- [VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 }, +- [VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 }, + [VIRT_DRAM] = { 0x80000000, 0x0 }, + }; + +@@ -67,139 +59,23 @@ static const MemMapEntry virt_memmap[] = { + /* PCIe high mmio for RV64, size is fixed but base depends on top of RAM */ + #define VIRT64_HIGH_PCIE_MMIO_SIZE (16 * GiB) + +-static MemMapEntry virt_high_pcie_memmap; +- + #define VIRT_FLASH_SECTOR_SIZE (256 * KiB) + +-static PFlashCFI01 *virt_flash_create1(RISCVVirtState *s, +- const char *name, +- const char *alias_prop_name) +-{ +- /* +- * Create a single flash device. We use the same parameters as +- * the flash devices on the ARM virt board. +- */ +- DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01); +- +- qdev_prop_set_uint64(dev, "sector-length", VIRT_FLASH_SECTOR_SIZE); +- qdev_prop_set_uint8(dev, "width", 4); +- qdev_prop_set_uint8(dev, "device-width", 2); +- qdev_prop_set_bit(dev, "big-endian", false); +- qdev_prop_set_uint16(dev, "id0", 0x89); +- qdev_prop_set_uint16(dev, "id1", 0x18); +- qdev_prop_set_uint16(dev, "id2", 0x00); +- qdev_prop_set_uint16(dev, "id3", 0x00); +- qdev_prop_set_string(dev, "name", name); +- +- object_property_add_child(OBJECT(s), name, OBJECT(dev)); +- object_property_add_alias(OBJECT(s), alias_prop_name, +- OBJECT(dev), "drive"); +- +- return PFLASH_CFI01(dev); +-} +- +-static void virt_flash_create(RISCVVirtState *s) +-{ +- s->flash[0] = virt_flash_create1(s, "virt.flash0", "pflash0"); +- s->flash[1] = virt_flash_create1(s, "virt.flash1", "pflash1"); +-} +- +-static void virt_flash_map1(PFlashCFI01 *flash, +- hwaddr base, hwaddr size, +- MemoryRegion *sysmem) +-{ +- DeviceState *dev = DEVICE(flash); +- +- assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE)); +- assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX); +- qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); +- +- memory_region_add_subregion(sysmem, base, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), +- 0)); +-} +- +-static void virt_flash_map(RISCVVirtState *s, +- MemoryRegion *sysmem) +-{ +- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; +- hwaddr flashbase = virt_memmap[VIRT_FLASH].base; +- +- virt_flash_map1(s->flash[0], flashbase, flashsize, +- sysmem); +- virt_flash_map1(s->flash[1], flashbase + flashsize, flashsize, +- sysmem); +-} +- +-static void create_pcie_irq_map(void *fdt, char *nodename, +- uint32_t plic_phandle) +-{ +- int pin, dev; +- uint32_t +- full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * FDT_INT_MAP_WIDTH] = {}; +- uint32_t *irq_map = full_irq_map; +- +- /* This code creates a standard swizzle of interrupts such that +- * each device's first interrupt is based on it's PCI_SLOT number. +- * (See pci_swizzle_map_irq_fn()) +- * +- * We only need one entry per interrupt in the table (not one per +- * possible slot) seeing the interrupt-map-mask will allow the table +- * to wrap to any number of devices. +- */ +- for (dev = 0; dev < GPEX_NUM_IRQS; dev++) { +- int devfn = dev * 0x8; +- +- for (pin = 0; pin < GPEX_NUM_IRQS; pin++) { +- int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS); +- int i = 0; +- +- irq_map[i] = cpu_to_be32(devfn << 8); +- +- i += FDT_PCI_ADDR_CELLS; +- irq_map[i] = cpu_to_be32(pin + 1); +- +- i += FDT_PCI_INT_CELLS; +- irq_map[i++] = cpu_to_be32(plic_phandle); +- +- i += FDT_PLIC_ADDR_CELLS; +- irq_map[i] = cpu_to_be32(irq_nr); +- +- irq_map += FDT_INT_MAP_WIDTH; +- } +- } +- +- qemu_fdt_setprop(fdt, nodename, "interrupt-map", +- full_irq_map, sizeof(full_irq_map)); +- +- qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask", +- 0x1800, 0, 0, 0x7); +-} +- + static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + uint64_t mem_size, const char *cmdline, bool is_32_bit) + { + void *fdt; +- int i, cpu, socket; ++ //int i, cpu, socket; ++ int cpu, socket; + MachineState *mc = MACHINE(s); + uint64_t addr, size; + uint32_t *clint_cells, *plic_cells; + unsigned long clint_addr, plic_addr; + uint32_t plic_phandle[MAX_NODES]; +- uint32_t cpu_phandle, intc_phandle, test_phandle; ++ uint32_t cpu_phandle, intc_phandle; + uint32_t phandle = 1, plic_mmio_phandle = 1; +- uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1; + char *mem_name, *cpu_name, *core_name, *intc_name; + char *name, *clint_name, *plic_name, *clust_name; +- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; +- hwaddr flashbase = virt_memmap[VIRT_FLASH].base; +- static const char * const clint_compat[2] = { +- "sifive,clint0", "riscv,clint0" +- }; +- static const char * const plic_compat[2] = { +- "sifive,plic-1.0.0", "riscv,plic0" +- }; + + if (mc->dtb) { + fdt = mc->fdt = load_device_tree(mc->dtb, &s->fdt_size); +@@ -305,8 +181,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + (memmap[VIRT_CLINT].size * socket); + clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + qemu_fdt_add_subnode(fdt, clint_name); +- qemu_fdt_setprop_string_array(fdt, clint_name, "compatible", +- (char **)&clint_compat, ARRAY_SIZE(clint_compat)); ++ qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0"); + qemu_fdt_setprop_cells(fdt, clint_name, "reg", + 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); + qemu_fdt_setprop(fdt, clint_name, "interrupts-extended", +@@ -322,8 +197,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + "#address-cells", FDT_PLIC_ADDR_CELLS); + qemu_fdt_setprop_cell(fdt, plic_name, + "#interrupt-cells", FDT_PLIC_INT_CELLS); +- qemu_fdt_setprop_string_array(fdt, plic_name, "compatible", +- (char **)&plic_compat, ARRAY_SIZE(plic_compat)); ++ qemu_fdt_setprop_string(fdt, plic_name, "compatible", "riscv,plic0"); + qemu_fdt_setprop(fdt, plic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop(fdt, plic_name, "interrupts-extended", + plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); +@@ -342,95 +216,11 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + if (socket == 0) { + plic_mmio_phandle = plic_phandle[socket]; +- plic_virtio_phandle = plic_phandle[socket]; +- plic_pcie_phandle = plic_phandle[socket]; +- } +- if (socket == 1) { +- plic_virtio_phandle = plic_phandle[socket]; +- plic_pcie_phandle = plic_phandle[socket]; +- } +- if (socket == 2) { +- plic_pcie_phandle = plic_phandle[socket]; + } + } + + riscv_socket_fdt_write_distance_matrix(mc, fdt); + +- for (i = 0; i < VIRTIO_COUNT; i++) { +- name = g_strdup_printf("/soc/virtio_mmio@%lx", +- (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size)); +- qemu_fdt_add_subnode(fdt, name); +- qemu_fdt_setprop_string(fdt, name, "compatible", "virtio,mmio"); +- qemu_fdt_setprop_cells(fdt, name, "reg", +- 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, +- 0x0, memmap[VIRT_VIRTIO].size); +- qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", +- plic_virtio_phandle); +- qemu_fdt_setprop_cell(fdt, name, "interrupts", VIRTIO_IRQ + i); +- g_free(name); +- } +- +- name = g_strdup_printf("/soc/pci@%lx", +- (long) memmap[VIRT_PCIE_ECAM].base); +- qemu_fdt_add_subnode(fdt, name); +- qemu_fdt_setprop_cell(fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS); +- qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", FDT_PCI_INT_CELLS); +- qemu_fdt_setprop_cell(fdt, name, "#size-cells", 0x2); +- qemu_fdt_setprop_string(fdt, name, "compatible", "pci-host-ecam-generic"); +- qemu_fdt_setprop_string(fdt, name, "device_type", "pci"); +- qemu_fdt_setprop_cell(fdt, name, "linux,pci-domain", 0); +- qemu_fdt_setprop_cells(fdt, name, "bus-range", 0, +- memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); +- qemu_fdt_setprop(fdt, name, "dma-coherent", NULL, 0); +- qemu_fdt_setprop_cells(fdt, name, "reg", 0, +- memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); +- qemu_fdt_setprop_sized_cells(fdt, name, "ranges", +- 1, FDT_PCI_RANGE_IOPORT, 2, 0, +- 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size, +- 1, FDT_PCI_RANGE_MMIO, +- 2, memmap[VIRT_PCIE_MMIO].base, +- 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size, +- 1, FDT_PCI_RANGE_MMIO_64BIT, +- 2, virt_high_pcie_memmap.base, +- 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); +- +- create_pcie_irq_map(fdt, name, plic_pcie_phandle); +- g_free(name); +- +- test_phandle = phandle++; +- name = g_strdup_printf("/soc/test@%lx", +- (long)memmap[VIRT_TEST].base); +- qemu_fdt_add_subnode(fdt, name); +- { +- static const char * const compat[3] = { +- "sifive,test1", "sifive,test0", "syscon" +- }; +- qemu_fdt_setprop_string_array(fdt, name, "compatible", (char **)&compat, +- ARRAY_SIZE(compat)); +- } +- qemu_fdt_setprop_cells(fdt, name, "reg", +- 0x0, memmap[VIRT_TEST].base, +- 0x0, memmap[VIRT_TEST].size); +- qemu_fdt_setprop_cell(fdt, name, "phandle", test_phandle); +- test_phandle = qemu_fdt_get_phandle(fdt, name); +- g_free(name); +- +- name = g_strdup_printf("/soc/reboot"); +- qemu_fdt_add_subnode(fdt, name); +- qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot"); +- qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); +- qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); +- qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_RESET); +- g_free(name); +- +- name = g_strdup_printf("/soc/poweroff"); +- qemu_fdt_add_subnode(fdt, name); +- qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-poweroff"); +- qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle); +- qemu_fdt_setprop_cell(fdt, name, "offset", 0x0); +- qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_PASS); +- g_free(name); +- + name = g_strdup_printf("/soc/uart@%lx", (long)memmap[VIRT_UART0].base); + qemu_fdt_add_subnode(fdt, name); + qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a"); +@@ -445,102 +235,12 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, + qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", name); + g_free(name); + +- name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base); +- qemu_fdt_add_subnode(fdt, name); +- qemu_fdt_setprop_string(fdt, name, "compatible", "google,goldfish-rtc"); +- qemu_fdt_setprop_cells(fdt, name, "reg", +- 0x0, memmap[VIRT_RTC].base, +- 0x0, memmap[VIRT_RTC].size); +- qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle); +- qemu_fdt_setprop_cell(fdt, name, "interrupts", RTC_IRQ); +- g_free(name); +- +- name = g_strdup_printf("/soc/flash@%" PRIx64, flashbase); +- qemu_fdt_add_subnode(mc->fdt, name); +- qemu_fdt_setprop_string(mc->fdt, name, "compatible", "cfi-flash"); +- qemu_fdt_setprop_sized_cells(mc->fdt, name, "reg", +- 2, flashbase, 2, flashsize, +- 2, flashbase + flashsize, 2, flashsize); +- qemu_fdt_setprop_cell(mc->fdt, name, "bank-width", 4); +- g_free(name); +- + update_bootargs: + if (cmdline) { + qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline); + } + } + +-static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, +- hwaddr ecam_base, hwaddr ecam_size, +- hwaddr mmio_base, hwaddr mmio_size, +- hwaddr high_mmio_base, +- hwaddr high_mmio_size, +- hwaddr pio_base, +- DeviceState *plic) +-{ +- DeviceState *dev; +- MemoryRegion *ecam_alias, *ecam_reg; +- MemoryRegion *mmio_alias, *high_mmio_alias, *mmio_reg; +- qemu_irq irq; +- int i; +- +- dev = qdev_new(TYPE_GPEX_HOST); +- +- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); +- +- ecam_alias = g_new0(MemoryRegion, 1); +- ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); +- memory_region_init_alias(ecam_alias, OBJECT(dev), "pcie-ecam", +- ecam_reg, 0, ecam_size); +- memory_region_add_subregion(get_system_memory(), ecam_base, ecam_alias); +- +- mmio_alias = g_new0(MemoryRegion, 1); +- mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1); +- memory_region_init_alias(mmio_alias, OBJECT(dev), "pcie-mmio", +- mmio_reg, mmio_base, mmio_size); +- memory_region_add_subregion(get_system_memory(), mmio_base, mmio_alias); +- +- /* Map high MMIO space */ +- high_mmio_alias = g_new0(MemoryRegion, 1); +- memory_region_init_alias(high_mmio_alias, OBJECT(dev), "pcie-mmio-high", +- mmio_reg, high_mmio_base, high_mmio_size); +- memory_region_add_subregion(get_system_memory(), high_mmio_base, +- high_mmio_alias); +- +- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base); +- +- for (i = 0; i < GPEX_NUM_IRQS; i++) { +- irq = qdev_get_gpio_in(plic, PCIE_IRQ + i); +- +- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); +- gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i); +- } +- +- return dev; +-} +- +-static FWCfgState *create_fw_cfg(const MachineState *mc) +-{ +- hwaddr base = virt_memmap[VIRT_FW_CFG].base; +- hwaddr size = virt_memmap[VIRT_FW_CFG].size; +- FWCfgState *fw_cfg; +- char *nodename; +- +- fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, +- &address_space_memory); +- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus); +- +- nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); +- qemu_fdt_add_subnode(mc->fdt, nodename); +- qemu_fdt_setprop_string(mc->fdt, nodename, +- "compatible", "qemu,fw-cfg-mmio"); +- qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg", +- 2, base, 2, size); +- qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0); +- g_free(nodename); +- return fw_cfg; +-} +- + static void virt_machine_init(MachineState *machine) + { + const MemMapEntry *memmap = virt_memmap; +@@ -554,7 +254,7 @@ static void virt_machine_init(MachineState *machine) + target_ulong firmware_end_addr, kernel_start_addr; + uint32_t fdt_load_addr; + uint64_t kernel_entry; +- DeviceState *mmio_plic, *virtio_plic, *pcie_plic; ++ DeviceState *mmio_plic; + int i, j, base_hartid, hart_count; + + /* Check socket count limit */ +@@ -565,7 +265,7 @@ static void virt_machine_init(MachineState *machine) + } + + /* Initialize sockets */ +- mmio_plic = virtio_plic = pcie_plic = NULL; ++ mmio_plic = NULL; + for (i = 0; i < riscv_socket_count(machine); i++) { + if (!riscv_socket_check_hartids(machine, i)) { + error_report("discontinuous hartids in socket%d", i); +@@ -634,15 +334,6 @@ static void virt_machine_init(MachineState *machine) + /* Try to use different PLIC instance based device type */ + if (i == 0) { + mmio_plic = s->plic[i]; +- virtio_plic = s->plic[i]; +- pcie_plic = s->plic[i]; +- } +- if (i == 1) { +- virtio_plic = s->plic[i]; +- pcie_plic = s->plic[i]; +- } +- if (i == 2) { +- pcie_plic = s->plic[i]; + } + } + +@@ -654,13 +345,6 @@ static void virt_machine_init(MachineState *machine) + error_report("Limiting RAM size to 10 GiB"); + } + #endif +- virt_high_pcie_memmap.base = VIRT32_HIGH_PCIE_MMIO_BASE; +- virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE; +- } else { +- virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE; +- virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size; +- virt_high_pcie_memmap.base = +- ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); + } + + /* register system main memory (actual RAM) */ +@@ -681,10 +365,12 @@ static void virt_machine_init(MachineState *machine) + + if (riscv_is_32bit(&s->soc[0])) { + firmware_end_addr = riscv_find_and_load_firmware(machine, +- RISCV32_BIOS_BIN, start_addr, NULL); ++ "opensbi-riscv32-generic-fw_dynamic.bin", ++ start_addr, NULL); + } else { + firmware_end_addr = riscv_find_and_load_firmware(machine, +- RISCV64_BIOS_BIN, start_addr, NULL); ++ "opensbi-riscv64-generic-fw_dynamic.bin", ++ start_addr, NULL); + } + + if (machine->kernel_filename) { +@@ -712,21 +398,6 @@ static void virt_machine_init(MachineState *machine) + kernel_entry = 0; + } + +- if (drive_get(IF_PFLASH, 0, 0)) { +- /* +- * Pflash was supplied, let's overwrite the address we jump to after +- * reset to the base of the flash. +- */ +- start_addr = virt_memmap[VIRT_FLASH].base; +- } +- +- /* +- * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the device +- * tree cannot be altered and we get FDT_ERR_NOSPACE. +- */ +- s->fw_cfg = create_fw_cfg(machine); +- rom_set_fw(s->fw_cfg); +- + /* Compute the fdt load address in dram */ + fdt_load_addr = riscv_load_fdt(memmap[VIRT_DRAM].base, + machine->ram_size, machine->fdt); +@@ -736,41 +407,10 @@ static void virt_machine_init(MachineState *machine) + virt_memmap[VIRT_MROM].size, kernel_entry, + fdt_load_addr, machine->fdt); + +- /* SiFive Test MMIO device */ +- sifive_test_create(memmap[VIRT_TEST].base); +- +- /* VirtIO MMIO devices */ +- for (i = 0; i < VIRTIO_COUNT; i++) { +- sysbus_create_simple("virtio-mmio", +- memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, +- qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i)); +- } +- +- gpex_pcie_init(system_memory, +- memmap[VIRT_PCIE_ECAM].base, +- memmap[VIRT_PCIE_ECAM].size, +- memmap[VIRT_PCIE_MMIO].base, +- memmap[VIRT_PCIE_MMIO].size, +- virt_high_pcie_memmap.base, +- virt_high_pcie_memmap.size, +- memmap[VIRT_PCIE_PIO].base, +- DEVICE(pcie_plic)); +- +- serial_mm_init(system_memory, memmap[VIRT_UART0].base, ++ serial_mm_init(system_memory, memmap[VIRT_UART0].base, + 0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193, + serial_hd(0), DEVICE_LITTLE_ENDIAN); + +- sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, +- qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ)); +- +- virt_flash_create(s); +- +- for (i = 0; i < ARRAY_SIZE(s->flash); i++) { +- /* Map legacy -drive if=pflash to machine properties */ +- pflash_cfi01_legacy_drive(s->flash[i], +- drive_get(IF_PFLASH, 0, i)); +- } +- virt_flash_map(s, system_memory); + } + + static void virt_machine_instance_init(Object *obj) +diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c +index 991a6bb760..401028b8d9 100644 +--- a/target/riscv/cpu.c ++++ b/target/riscv/cpu.c +@@ -269,6 +269,15 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags) + qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mip ", env->mip); + qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mie ", env->mie); + qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mideleg ", env->mideleg); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mcounteren ", env->mcounteren); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "misa ", env->misa); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mscratch ", env->mscratch); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "satp ", env->satp); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "scounteren ", env->scounteren); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "sscratch ", env->sscratch); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "stvec ", env->stvec); ++ qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mhartid ", env->mhartid); ++ + if (riscv_has_ext(env, RVH)) { + qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "hideleg ", env->hideleg); + } diff --git a/wally-pipelined/src/cache/dcache_ptw_interaction_README.txt b/wally-pipelined/src/cache/dcache_ptw_interaction_README.txt index cc7a96cc..47e2af5d 100644 --- a/wally-pipelined/src/cache/dcache_ptw_interaction_README.txt +++ b/wally-pipelined/src/cache/dcache_ptw_interaction_README.txt @@ -1,8 +1,8 @@ Intractions betwen the dcache and hardware page table walker are complex. In particular the complications arise when a fault occurs concurrently with a memory operation. -At the begining of very memory operation there are 8 combinations of three signals; -ITBL miss, DTLB miss, and memory operation. By looking at each combination we +At the begining of every memory operation there are 8 combinations of three signals; +ITBL miss, DTLB miss, and a memory operation. By looking at each combination we can understand exactly the correct sequence of operations and if the operation should continue. @@ -47,7 +47,7 @@ Dcache handles the operation. Type 3a is a memory operation with a DTLB miss. The Dcache enters a special set of states designed to handle the page table walker (HTPW). Secondly the HPTW takes control over the -LSU via a set of multiplexors in the LSU Arbiter, driving the Dcache with addresses into the +LSU via a set of multiplexors in the LSU Arbiter, driving the Dcache with addresses of the page table. Interally to the HPTW an FSM checks each node of the Page Table and eventually signals either a TLB write or a TLB Fault. In Type 3a the DTLB is written with the leaf page table entry and returns control of the Dcache back to the IEU. Now the Dcache finishes @@ -70,7 +70,7 @@ Type 4b is also an ITLB miss. As with 4a the Dcache switches into page table wa until it finds a leaf or in this case a fault. The fault is deteched and the Dcaches switches back to normal mode. -Type 5a is a Type 4a with a current memory operation. The Dcache first switches to walker mode +Type 5a is a Type 4a with a current memory operation. The Dcache first switches to walker mode. Other traps. A new problem has emerged. What happens when an interrupt occurs during a page table walk? diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv index cbfd2091..63e712f1 100644 --- a/wally-pipelined/src/cache/dcachefsm.sv +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -27,116 +27,116 @@ module dcachefsm (input logic clk, - input logic reset, + input logic reset, // inputs from IEU input logic [1:0] MemRWM, input logic [1:0] AtomicM, - input logic FlushDCacheM, + input logic FlushDCacheM, // hazard inputs - input logic ExceptionM, - input logic PendingInterruptM, - input logic StallWtoDCache, + input logic ExceptionM, + input logic PendingInterruptM, + input logic StallWtoDCache, // mmu inputs - input logic DTLBMissM, - input logic ITLBMissF, - input logic CacheableM, - input logic DTLBWriteM, - input logic ITLBWriteF, - input logic WalkerInstrPageFaultF, + input logic DTLBMissM, + input logic ITLBMissF, + input logic CacheableM, + input logic DTLBWriteM, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, // hptw inputs - input logic SelPTW, - input logic WalkerPageFaultM, + input logic SelPTW, + input logic WalkerPageFaultM, // Bus inputs - input logic AHBAck, // from ahb + input logic AHBAck, // from ahb // dcache internals - input logic CacheHit, - input logic FetchCountFlag, - input logic VictimDirty, - input logic FlushAdrFlag, - + input logic CacheHit, + input logic FetchCountFlag, + input logic VictimDirty, + input logic FlushAdrFlag, + // hazard outputs - output logic DCacheStall, - output logic CommittedM, + output logic DCacheStall, + output logic CommittedM, // counter outputs - output logic DCacheMiss, - output logic DCacheAccess, + output logic DCacheMiss, + output logic DCacheAccess, // hptw outputs - output logic MemAfterIWalkDone, + output logic MemAfterIWalkDone, // Bus outputs - output logic AHBRead, - output logic AHBWrite, + output logic AHBRead, + output logic AHBWrite, // dcache internals output logic [1:0] SelAdrM, - output logic CntEn, - output logic SetValid, - output logic ClearValid, - output logic SetDirty, - output logic ClearDirty, - output logic SRAMWordWriteEnableM, - output logic SRAMBlockWriteEnableM, - output logic CntReset, - output logic SelUncached, - output logic SelEvict, - output logic LRUWriteEn, - output logic SelFlush, - output logic FlushAdrCntEn, - output logic FlushWayCntEn, - output logic FlushAdrCntRst, - output logic FlushWayCntRst, - output logic VDWriteEnable + output logic CntEn, + output logic SetValid, + output logic ClearValid, + output logic SetDirty, + output logic ClearDirty, + output logic SRAMWordWriteEnableM, + output logic SRAMBlockWriteEnableM, + output logic CntReset, + output logic SelUncached, + output logic SelEvict, + output logic LRUWriteEn, + output logic SelFlush, + output logic FlushAdrCntEn, + output logic FlushWayCntEn, + output logic FlushAdrCntRst, + output logic FlushWayCntRst, + output logic VDWriteEnable ); - logic PreCntEn; - logic AnyCPUReqM; + logic PreCntEn; + logic AnyCPUReqM; - typedef enum {STATE_READY, + typedef enum {STATE_READY, - STATE_MISS_FETCH_WDV, - STATE_MISS_FETCH_DONE, - STATE_MISS_EVICT_DIRTY, - STATE_MISS_WRITE_CACHE_BLOCK, - STATE_MISS_READ_WORD, - STATE_MISS_READ_WORD_DELAY, - STATE_MISS_WRITE_WORD, + STATE_MISS_FETCH_WDV, + STATE_MISS_FETCH_DONE, + STATE_MISS_EVICT_DIRTY, + STATE_MISS_WRITE_CACHE_BLOCK, + STATE_MISS_READ_WORD, + STATE_MISS_READ_WORD_DELAY, + STATE_MISS_WRITE_WORD, - STATE_PTW_READY, - STATE_PTW_READ_MISS_FETCH_WDV, - STATE_PTW_READ_MISS_FETCH_DONE, - STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK, - STATE_PTW_READ_MISS_EVICT_DIRTY, - STATE_PTW_READ_MISS_READ_WORD, - STATE_PTW_READ_MISS_READ_WORD_DELAY, - STATE_PTW_ACCESS_AFTER_WALK, + STATE_PTW_READY, + STATE_PTW_READ_MISS_FETCH_WDV, + STATE_PTW_READ_MISS_FETCH_DONE, + STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK, + STATE_PTW_READ_MISS_EVICT_DIRTY, + STATE_PTW_READ_MISS_READ_WORD, + STATE_PTW_READ_MISS_READ_WORD_DELAY, + STATE_PTW_ACCESS_AFTER_WALK, - STATE_UNCACHED_WRITE, - STATE_UNCACHED_WRITE_DONE, - STATE_UNCACHED_READ, - STATE_UNCACHED_READ_DONE, + STATE_UNCACHED_WRITE, + STATE_UNCACHED_WRITE_DONE, + STATE_UNCACHED_READ, + STATE_UNCACHED_READ_DONE, - STATE_PTW_FAULT_READY, - STATE_PTW_FAULT_CPU_BUSY, - STATE_PTW_FAULT_MISS_FETCH_WDV, - STATE_PTW_FAULT_MISS_FETCH_DONE, - STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK, - STATE_PTW_FAULT_MISS_READ_WORD, - STATE_PTW_FAULT_MISS_READ_WORD_DELAY, - STATE_PTW_FAULT_MISS_WRITE_WORD, - STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY, - STATE_PTW_FAULT_MISS_EVICT_DIRTY, + STATE_PTW_FAULT_READY, + STATE_PTW_FAULT_CPU_BUSY, + STATE_PTW_FAULT_MISS_FETCH_WDV, + STATE_PTW_FAULT_MISS_FETCH_DONE, + STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK, + STATE_PTW_FAULT_MISS_READ_WORD, + STATE_PTW_FAULT_MISS_READ_WORD_DELAY, + STATE_PTW_FAULT_MISS_WRITE_WORD, + STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY, + STATE_PTW_FAULT_MISS_EVICT_DIRTY, - STATE_PTW_FAULT_UNCACHED_WRITE, - STATE_PTW_FAULT_UNCACHED_WRITE_DONE, - STATE_PTW_FAULT_UNCACHED_READ, - STATE_PTW_FAULT_UNCACHED_READ_DONE, + STATE_PTW_FAULT_UNCACHED_WRITE, + STATE_PTW_FAULT_UNCACHED_WRITE_DONE, + STATE_PTW_FAULT_UNCACHED_READ, + STATE_PTW_FAULT_UNCACHED_READ_DONE, - STATE_CPU_BUSY, - STATE_CPU_BUSY_FINISH_AMO, - - STATE_FLUSH, - STATE_FLUSH_WRITE_BACK, - STATE_FLUSH_CLEAR_DIRTY} statetype; + STATE_CPU_BUSY, + STATE_CPU_BUSY_FINISH_AMO, + + STATE_FLUSH, + STATE_FLUSH_WRITE_BACK, + STATE_FLUSH_CLEAR_DIRTY} statetype; (* mark_debug = "true" *) statetype CurrState, NextState; @@ -181,118 +181,118 @@ module dcachefsm case (CurrState) STATE_READY: begin - CntReset = 1'b0; - DCacheStall = 1'b0; - AHBRead = 1'b0; - AHBWrite = 1'b0; - SelAdrM = 2'b00; - SRAMWordWriteEnableM = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - CommittedM = 1'b0; + CntReset = 1'b0; + DCacheStall = 1'b0; + AHBRead = 1'b0; + AHBWrite = 1'b0; + SelAdrM = 2'b00; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + CommittedM = 1'b0; - // TLB Miss - if(((AnyCPUReqM & DTLBMissM) | ITLBMissF) & ~(ExceptionM | PendingInterruptM)) begin - // the LSU arbiter has not yet selected the PTW. - // The CPU needs to be stalled until that happens. - // If we set DCacheStall for 1 cycle before going to - // PTW ready the CPU will stall. - // The page table walker asserts it's control 1 cycle - // after the TLBs miss. - CommittedM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_READY; - end + // TLB Miss + if(((AnyCPUReqM & DTLBMissM) | ITLBMissF) & ~(ExceptionM | PendingInterruptM)) begin + // the LSU arbiter has not yet selected the PTW. + // The CPU needs to be stalled until that happens. + // If we set DCacheStall for 1 cycle before going to + // PTW ready the CPU will stall. + // The page table walker asserts it's control 1 cycle + // after the TLBs miss. + CommittedM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_READY; + end - // Flush dcache to next level of memory - else if(FlushDCacheM & ~(ExceptionM | PendingInterruptM)) begin - NextState = STATE_FLUSH; - DCacheStall = 1'b1; - SelAdrM = 2'b11; - FlushAdrCntRst = 1'b1; - FlushWayCntRst = 1'b1; - end - - // amo hit - else if(AtomicM[1] & (&MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin - SelAdrM = 2'b10; - DCacheStall = 1'b0; - - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - SelAdrM = 2'b10; - end - else begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - end - // read hit valid cached - else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin - DCacheStall = 1'b0; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; + // Flush dcache to next level of memory + else if(FlushDCacheM & ~(ExceptionM | PendingInterruptM)) begin + NextState = STATE_FLUSH; + DCacheStall = 1'b1; + SelAdrM = 2'b11; + FlushAdrCntRst = 1'b1; + FlushWayCntRst = 1'b1; + end + + // amo hit + else if(AtomicM[1] & (&MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + SelAdrM = 2'b10; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end + // read hit valid cached + else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + DCacheStall = 1'b0; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end - end - // write hit valid cached - else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin - SelAdrM = 2'b10; - DCacheStall = 1'b0; - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end - end - // read or write miss valid cached - else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit & ~DTLBMissM) begin - NextState = STATE_MISS_FETCH_WDV; - CntReset = 1'b1; - DCacheStall = 1'b1; - end - // uncached write - else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin - NextState = STATE_UNCACHED_WRITE; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBWrite = 1'b1; - end - // uncached read - else if(MemRWM[1] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin - NextState = STATE_UNCACHED_READ; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBRead = 1'b1; - end - // fault - else if(AnyCPUReqM & (ExceptionM | PendingInterruptM) & ~DTLBMissM) begin - NextState = STATE_READY; - end - else NextState = STATE_READY; + end + else begin + NextState = STATE_READY; + end + end + // write hit valid cached + else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + // read or write miss valid cached + else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit & ~DTLBMissM) begin + NextState = STATE_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end + // uncached write + else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_UNCACHED_WRITE; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + end + // uncached read + else if(MemRWM[1] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_UNCACHED_READ; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBRead = 1'b1; + end + // fault + else if(AnyCPUReqM & (ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_READY; + end + else NextState = STATE_READY; end STATE_MISS_FETCH_WDV: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBRead = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + if (FetchCountFlag & AHBAck) begin NextState = STATE_MISS_FETCH_DONE; end else begin @@ -301,170 +301,170 @@ module dcachefsm end STATE_MISS_FETCH_DONE: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; + DCacheStall = 1'b1; + SelAdrM = 2'b10; CntReset = 1'b1; - CommittedM = 1'b1; - if(VictimDirty) begin - NextState = STATE_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_MISS_WRITE_CACHE_BLOCK; - end + CommittedM = 1'b1; + if(VictimDirty) begin + NextState = STATE_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end end STATE_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_MISS_READ_WORD; - SelAdrM = 2'b10; - SetValid = 1'b1; - ClearDirty = 1'b1; - CommittedM = 1'b1; - //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write end STATE_MISS_READ_WORD: begin - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - if (MemRWM[0] & ~AtomicM[1]) begin // handles stores and amo write. - NextState = STATE_MISS_WRITE_WORD; - end else begin - NextState = STATE_MISS_READ_WORD_DELAY; - // delay state is required as the read signal MemRWM[1] is still high when we - // return to the ready state because the cache is stalling the cpu. - end + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + if (MemRWM[0] & ~AtomicM[1]) begin // handles stores and amo write. + NextState = STATE_MISS_WRITE_WORD; + end else begin + NextState = STATE_MISS_READ_WORD_DELAY; + // delay state is required as the read signal MemRWM[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end end STATE_MISS_READ_WORD_DELAY: begin - //SelAdrM = 2'b10; - CommittedM = 1'b1; - SRAMWordWriteEnableM = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - if(&MemRWM & AtomicM[1]) begin // amo write - SelAdrM = 2'b10; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - end - else begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - end else begin - LRUWriteEn = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end - end + //SelAdrM = 2'b10; + CommittedM = 1'b1; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + if(&MemRWM & AtomicM[1]) begin // amo write + SelAdrM = 2'b10; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end else begin + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end end STATE_MISS_WRITE_WORD: begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end end STATE_MISS_EVICT_DIRTY: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - SelEvict = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_MISS_WRITE_CACHE_BLOCK; - end else begin - NextState = STATE_MISS_EVICT_DIRTY; - end + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_MISS_EVICT_DIRTY; + end end STATE_PTW_READY: begin - // now all output connect to PTW instead of CPU. - CommittedM = 1'b1; - SelAdrM = 2'b00; - DCacheStall = 1'b0; - LRUWriteEn = 1'b0; - CntReset = 1'b0; + // now all output connect to PTW instead of CPU. + CommittedM = 1'b1; + SelAdrM = 2'b00; + DCacheStall = 1'b0; + LRUWriteEn = 1'b0; + CntReset = 1'b0; - // In this branch we remove stall and go back to ready. There is no request for memory from the - // datapath or the walker had a fault. - // types 3b, 4a, 4b, and 7c. - if ((DTLBMissM & WalkerPageFaultM) | // 3b - (ITLBMissF & (WalkerInstrPageFaultF | ITLBWriteF) & ~AnyCPUReqM & ~DTLBMissM) | // 4a and 4b - (DTLBMissM & ITLBMissF & WalkerPageFaultM)) begin // 7c - NextState = STATE_READY; - DCacheStall = 1'b0; - end - // in this branch we go back to ready, but there is a memory operation from - // the datapath so we MUST stall and replay the operation. - // types 3a and 5a - else if ((DTLBMissM & DTLBWriteM) | // 3a - (ITLBMissF & ITLBWriteF & AnyCPUReqM)) begin // 5a - NextState = STATE_READY; - DCacheStall = 1'b1; - SelAdrM = 2'b01; - end + // In this branch we remove stall and go back to ready. There is no request for memory from the + // datapath or the walker had a fault. + // types 3b, 4a, 4b, and 7c. + if ((DTLBMissM & WalkerPageFaultM) | // 3b or 7c (can have either itlb miss or not) + (ITLBMissF & (WalkerInstrPageFaultF | ITLBWriteF) & ~AnyCPUReqM & ~DTLBMissM) | // 4a and 4b + (DTLBMissM & ITLBMissF & WalkerPageFaultM)) begin // 7c *** BUG redundant with first condiction. + NextState = STATE_READY; + DCacheStall = 1'b0; + end + // in this branch we go back to ready, but there is a memory operation from + // the datapath so we MUST stall and replay the operation. + // types 3a and 5a + else if ((DTLBMissM & DTLBWriteM) | // 3a + (ITLBMissF & ITLBWriteF & AnyCPUReqM)) begin // 5a + NextState = STATE_READY; + DCacheStall = 1'b1; + SelAdrM = 2'b01; + end - // like 5a we want to stall and go to the ready state, but we also have to save - // the WalkerInstrPageFaultF so it is held until the end of the memory operation - // from the datapath. - // types 5b - else if (ITLBMissF & WalkerInstrPageFaultF & AnyCPUReqM) begin // 5b - NextState = STATE_PTW_FAULT_READY; - DCacheStall = 1'b1; - SelAdrM = 2'b01; - end + // like 5a we want to stall and go to the ready state, but we also have to save + // the WalkerInstrPageFaultF so it is held until the end of the memory operation + // from the datapath. + // types 5b + else if (ITLBMissF & WalkerInstrPageFaultF & AnyCPUReqM) begin // 5b + NextState = STATE_PTW_FAULT_READY; + DCacheStall = 1'b1; + SelAdrM = 2'b01; + end - // in this branch we stay in ptw_ready because we are doing an itlb walk - // after a dtlb walk. - // types 7a and 7b. - else if (DTLBMissM & DTLBWriteM & ITLBMissF)begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - - // read hit valid cached - end else if(MemRWM[1] & CacheableM & ~ExceptionM & CacheHit) begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - LRUWriteEn = 1'b1; - end + // in this branch we stay in ptw_ready because we are doing an itlb walk + // after a dtlb walk. + // types 7a and 7b. + else if (DTLBMissM & DTLBWriteM & ITLBMissF)begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + + // read hit valid cached + end else if(MemRWM[1] & CacheableM & ~ExceptionM & CacheHit) begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + LRUWriteEn = 1'b1; + end - // read miss valid cached - else if(SelPTW & MemRWM[1] & CacheableM & ~ExceptionM & ~CacheHit) begin - NextState = STATE_PTW_READ_MISS_FETCH_WDV; - CntReset = 1'b1; - DCacheStall = 1'b1; - end + // read miss valid cached + else if(SelPTW & MemRWM[1] & CacheableM & ~ExceptionM & ~CacheHit) begin + NextState = STATE_PTW_READ_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end - else begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - end + else begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + end end STATE_PTW_READ_MISS_FETCH_WDV: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBRead = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + if(FetchCountFlag & AHBAck) begin NextState = STATE_PTW_READ_MISS_FETCH_DONE; end else begin @@ -473,238 +473,238 @@ module dcachefsm end STATE_PTW_READ_MISS_FETCH_DONE: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; + DCacheStall = 1'b1; + SelAdrM = 2'b10; CntReset = 1'b1; - CommittedM = 1'b1; + CommittedM = 1'b1; CntReset = 1'b1; - if(VictimDirty) begin - NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; - end + if(VictimDirty) begin + NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; + end end STATE_PTW_READ_MISS_EVICT_DIRTY: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - SelEvict = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; - end else begin - NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; - end + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; + end end STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_READ_MISS_READ_WORD; - SelAdrM = 2'b10; - SetValid = 1'b1; - ClearDirty = 1'b1; - CommittedM = 1'b1; - //LRUWriteEn = 1'b1; + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; end STATE_PTW_READ_MISS_READ_WORD: begin - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - NextState = STATE_PTW_READ_MISS_READ_WORD_DELAY; + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD_DELAY; end STATE_PTW_READ_MISS_READ_WORD_DELAY: begin - SelAdrM = 2'b10; - NextState = STATE_PTW_READY; - CommittedM = 1'b1; + SelAdrM = 2'b10; + NextState = STATE_PTW_READY; + CommittedM = 1'b1; end STATE_PTW_ACCESS_AFTER_WALK: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; + DCacheStall = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; end STATE_CPU_BUSY: begin - CommittedM = 1'b1; - SelAdrM = 2'b00; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end + CommittedM = 1'b1; + SelAdrM = 2'b00; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end end STATE_CPU_BUSY_FINISH_AMO: begin - CommittedM = 1'b1; - SelAdrM = 2'b10; - SRAMWordWriteEnableM = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - end - else begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end + CommittedM = 1'b1; + SelAdrM = 2'b10; + SRAMWordWriteEnableM = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end end STATE_UNCACHED_WRITE : begin - DCacheStall = 1'b1; - AHBWrite = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_UNCACHED_WRITE_DONE; - end else begin - NextState = STATE_UNCACHED_WRITE; - end + DCacheStall = 1'b1; + AHBWrite = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_WRITE_DONE; + end else begin + NextState = STATE_UNCACHED_WRITE; + end end STATE_UNCACHED_READ: begin - DCacheStall = 1'b1; - AHBRead = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_UNCACHED_READ_DONE; - end else begin - NextState = STATE_UNCACHED_READ; - end + DCacheStall = 1'b1; + AHBRead = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_READ_DONE; + end else begin + NextState = STATE_UNCACHED_READ; + end end STATE_UNCACHED_WRITE_DONE: begin - CommittedM = 1'b1; - SelAdrM = 2'b00; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end + CommittedM = 1'b1; + SelAdrM = 2'b00; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end end STATE_UNCACHED_READ_DONE: begin - CommittedM = 1'b1; - SelUncached = 1'b1; - SelAdrM = 2'b00; - if(StallWtoDCache) begin - NextState = STATE_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - NextState = STATE_READY; - end + CommittedM = 1'b1; + SelUncached = 1'b1; + SelAdrM = 2'b00; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end end // itlb => instruction page fault states with memory request. STATE_PTW_FAULT_READY: begin - DCacheStall = 1'b0; - LRUWriteEn = 1'b0; - SelAdrM = 2'b00; - MemAfterIWalkDone = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - CntReset = 1'b0; - AHBWrite = 1'b0; - AHBRead = 1'b0; - CommittedM = 1'b1; - NextState = STATE_READY; - - - // read hit valid cached - if(MemRWM[1] & CacheableM & CacheHit & ~DTLBMissM) begin - DCacheStall = 1'b0; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; + DCacheStall = 1'b0; + LRUWriteEn = 1'b0; + SelAdrM = 2'b00; + MemAfterIWalkDone = 1'b0; + SetDirty = 1'b0; + LRUWriteEn = 1'b0; + CntReset = 1'b0; + AHBWrite = 1'b0; + AHBRead = 1'b0; + CommittedM = 1'b1; + NextState = STATE_READY; + + + // read hit valid cached + if(MemRWM[1] & CacheableM & CacheHit & ~DTLBMissM) begin + DCacheStall = 1'b0; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - - // write hit valid cached - else if (MemRWM[0] & CacheableM & CacheHit & ~DTLBMissM) begin - SelAdrM = 2'b10; - DCacheStall = 1'b0; - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - // read or write miss valid cached - else if((|MemRWM) & CacheableM & ~CacheHit & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; - CntReset = 1'b1; - DCacheStall = 1'b1; - end - // uncached write - else if(MemRWM[0] & ~CacheableM & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBWrite = 1'b1; - end - // uncached read - else if(MemRWM[1] & ~CacheableM & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_UNCACHED_READ; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBRead = 1'b1; - MemAfterIWalkDone = 1'b0; - end - // fault - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + // write hit valid cached + else if (MemRWM[0] & CacheableM & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + // read or write miss valid cached + else if((|MemRWM) & CacheableM & ~CacheHit & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end + // uncached write + else if(MemRWM[0] & ~CacheableM & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + end + // uncached read + else if(MemRWM[1] & ~CacheableM & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_UNCACHED_READ; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBRead = 1'b1; + MemAfterIWalkDone = 1'b0; + end + // fault + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end end STATE_PTW_FAULT_CPU_BUSY: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + MemAfterIWalkDone = 1'b0; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + SelAdrM = 2'b00; + end end STATE_PTW_FAULT_MISS_FETCH_WDV: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBRead = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + if(FetchCountFlag & AHBAck) begin NextState = STATE_PTW_FAULT_MISS_FETCH_DONE; end else begin @@ -713,201 +713,201 @@ module dcachefsm end STATE_PTW_FAULT_MISS_FETCH_DONE: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; + DCacheStall = 1'b1; + SelAdrM = 2'b10; CntReset = 1'b1; - CommittedM = 1'b1; - if(VictimDirty) begin - NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; - end + CommittedM = 1'b1; + if(VictimDirty) begin + NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; + end end STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_FAULT_MISS_READ_WORD; - SelAdrM = 2'b10; - SetValid = 1'b1; - ClearDirty = 1'b1; - CommittedM = 1'b1; - //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_FAULT_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write end STATE_PTW_FAULT_MISS_READ_WORD: begin - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - if(MemRWM[1]) begin - NextState = STATE_PTW_FAULT_MISS_READ_WORD_DELAY; - // delay state is required as the read signal MemRWM[1] is still high when we - // return to the ready state because the cache is stalling the cpu. - end else begin - NextState = STATE_PTW_FAULT_MISS_WRITE_WORD; - end + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + if(MemRWM[1]) begin + NextState = STATE_PTW_FAULT_MISS_READ_WORD_DELAY; + // delay state is required as the read signal MemRWM[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end else begin + NextState = STATE_PTW_FAULT_MISS_WRITE_WORD; + end end STATE_PTW_FAULT_MISS_READ_WORD_DELAY: begin - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - MemAfterIWalkDone = 1'b0; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + MemAfterIWalkDone = 1'b0; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + SelAdrM = 2'b00; + end end STATE_PTW_FAULT_MISS_WRITE_WORD: begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY; end STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + MemAfterIWalkDone = 1'b0; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + SelAdrM = 2'b00; + end end STATE_PTW_FAULT_MISS_EVICT_DIRTY: begin - DCacheStall = 1'b1; + DCacheStall = 1'b1; PreCntEn = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - SelEvict = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; - end else begin - NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; - end + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; + end end STATE_PTW_FAULT_UNCACHED_WRITE : begin - DCacheStall = 1'b1; - AHBWrite = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE_DONE; - end else begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE; - end + DCacheStall = 1'b1; + AHBWrite = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE_DONE; + end else begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE; + end end STATE_PTW_FAULT_UNCACHED_READ : begin - DCacheStall = 1'b1; - AHBRead = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_PTW_FAULT_UNCACHED_READ_DONE; - end else begin - NextState = STATE_PTW_FAULT_UNCACHED_READ; - end + DCacheStall = 1'b1; + AHBRead = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_PTW_FAULT_UNCACHED_READ_DONE; + end else begin + NextState = STATE_PTW_FAULT_UNCACHED_READ; + end end STATE_PTW_FAULT_UNCACHED_WRITE_DONE: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + MemAfterIWalkDone = 1'b0; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + SelAdrM = 2'b00; + end end STATE_PTW_FAULT_UNCACHED_READ_DONE: begin - CommittedM = 1'b1; - SelUncached = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end + CommittedM = 1'b1; + SelUncached = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end end STATE_FLUSH: begin - DCacheStall = 1'b1; - CommittedM = 1'b1; - SelAdrM = 2'b11; - SelFlush = 1'b1; - FlushAdrCntEn = 1'b1; - FlushWayCntEn = 1'b1; - CntReset = 1'b1; - if(VictimDirty) begin - NextState = STATE_FLUSH_WRITE_BACK; - FlushAdrCntEn = 1'b0; - FlushWayCntEn = 1'b0; - end else if (FlushAdrFlag) begin - NextState = STATE_READY; - DCacheStall = 1'b0; - FlushAdrCntEn = 1'b0; - FlushWayCntEn = 1'b0; - end else begin - NextState = STATE_FLUSH; - end + DCacheStall = 1'b1; + CommittedM = 1'b1; + SelAdrM = 2'b11; + SelFlush = 1'b1; + FlushAdrCntEn = 1'b1; + FlushWayCntEn = 1'b1; + CntReset = 1'b1; + if(VictimDirty) begin + NextState = STATE_FLUSH_WRITE_BACK; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + end else if (FlushAdrFlag) begin + NextState = STATE_READY; + DCacheStall = 1'b0; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + end else begin + NextState = STATE_FLUSH; + end end STATE_FLUSH_WRITE_BACK: begin - DCacheStall = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b11; - CommittedM = 1'b1; - SelFlush = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + SelAdrM = 2'b11; + CommittedM = 1'b1; + SelFlush = 1'b1; PreCntEn = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_FLUSH_CLEAR_DIRTY; - end else begin - NextState = STATE_FLUSH_WRITE_BACK; - end + if(FetchCountFlag & AHBAck) begin + NextState = STATE_FLUSH_CLEAR_DIRTY; + end else begin + NextState = STATE_FLUSH_WRITE_BACK; + end end STATE_FLUSH_CLEAR_DIRTY: begin - DCacheStall = 1'b1; - ClearDirty = 1'b1; - VDWriteEnable = 1'b1; - SelFlush = 1'b1; - SelAdrM = 2'b11; - FlushAdrCntEn = 1'b0; - FlushWayCntEn = 1'b0; - if(FlushAdrFlag) begin - NextState = STATE_READY; - DCacheStall = 1'b0; - SelAdrM = 2'b00; - end else begin - NextState = STATE_FLUSH; - FlushAdrCntEn = 1'b1; - FlushWayCntEn = 1'b1; - end + DCacheStall = 1'b1; + ClearDirty = 1'b1; + VDWriteEnable = 1'b1; + SelFlush = 1'b1; + SelAdrM = 2'b11; + FlushAdrCntEn = 1'b0; + FlushWayCntEn = 1'b0; + if(FlushAdrFlag) begin + NextState = STATE_READY; + DCacheStall = 1'b0; + SelAdrM = 2'b00; + end else begin + NextState = STATE_FLUSH; + FlushAdrCntEn = 1'b1; + FlushWayCntEn = 1'b1; + end end default: begin - NextState = STATE_READY; + NextState = STATE_READY; end endcase end diff --git a/wally-pipelined/src/cache/icachefsm.sv b/wally-pipelined/src/cache/icachefsm.sv index 433328ab..b104de4e 100644 --- a/wally-pipelined/src/cache/icachefsm.sv +++ b/wally-pipelined/src/cache/icachefsm.sv @@ -27,95 +27,95 @@ module icachefsm (// Inputs from pipeline - input logic clk, reset, + input logic clk, reset, - input logic StallF, + input logic StallF, // inputs from mmu - input logic ITLBMissF, - input logic ITLBWriteF, - input logic WalkerInstrPageFaultF, + input logic ITLBMissF, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, - input logic ExceptionM, PendingInterruptM, + input logic ExceptionM, PendingInterruptM, // BUS interface - input logic InstrAckF, + input logic InstrAckF, // icache internal inputs - input logic hit, - input logic FetchCountFlag, - input logic spill, + input logic hit, + input logic FetchCountFlag, + input logic spill, // icache internal outputs - output logic ICacheReadEn, + output logic ICacheReadEn, // Load data into the cache - output logic ICacheMemWriteEnable, + output logic ICacheMemWriteEnable, // Outputs to pipeline control stuff - output logic ICacheStallF, + output logic ICacheStallF, // Bus interface outputs - output logic InstrReadF, + output logic InstrReadF, // icache internal outputs - output logic spillSave, - output logic CntEn, - output logic CntReset, + output logic spillSave, + output logic CntEn, + output logic CntReset, output logic [1:0] SelAdr, - output logic LRUWriteEn + output logic LRUWriteEn ); // FSM states - typedef enum {STATE_READY, - STATE_HIT_SPILL, // spill, block 0 hit - STATE_HIT_SPILL_MISS_FETCH_WDV, // block 1 miss, issue read to AHB and wait data. - STATE_HIT_SPILL_MISS_FETCH_DONE, // write data into SRAM/LUT - STATE_HIT_SPILL_MERGE, // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + typedef enum {STATE_READY, + STATE_HIT_SPILL, // spill, block 0 hit + STATE_HIT_SPILL_MISS_FETCH_WDV, // block 1 miss, issue read to AHB and wait data. + STATE_HIT_SPILL_MISS_FETCH_DONE, // write data into SRAM/LUT + STATE_HIT_SPILL_MERGE, // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. - // a challenge is the spill signal gets us out of the ready state and moves us to - // 1 of the 2 spill branches. However the original fsm design had us return to - // the ready state when the spill + hits/misses were fully resolved. The problem - // is the spill signal is based on PCPF so when we return to READY to check if the - // cache has a hit it still expresses spill. We can fix in 1 of two ways. - // 1. we can add 1 extra state at the end of each spill branch to returns the instruction - // to the CPU advancing the CPU and icache to the next instruction. - // 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get - // to the READY state. - // The first first option is more robust and increases the number of states by 2. The - // second option is seams like it should work, but I worry there is a hidden interaction - // between CPU stalling and that register. - // Picking option 1. + // a challenge is the spill signal gets us out of the ready state and moves us to + // 1 of the 2 spill branches. However the original fsm design had us return to + // the ready state when the spill + hits/misses were fully resolved. The problem + // is the spill signal is based on PCPF so when we return to READY to check if the + // cache has a hit it still expresses spill. We can fix in 1 of two ways. + // 1. we can add 1 extra state at the end of each spill branch to returns the instruction + // to the CPU advancing the CPU and icache to the next instruction. + // 2. We can assert a signal which is delayed 1 cycle to suppress the spill when we get + // to the READY state. + // The first first option is more robust and increases the number of states by 2. The + // second option is seams like it should work, but I worry there is a hidden interaction + // between CPU stalling and that register. + // Picking option 1. - STATE_HIT_SPILL_FINAL, // this state replicates STATE_READY's replay of the - // spill access but does nto consider spill. It also does not do another operation. + STATE_HIT_SPILL_FINAL, // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. - STATE_MISS_FETCH_WDV, // aligned miss, issue read to AHB and wait for data. - STATE_MISS_FETCH_DONE, // write data into SRAM/LUT - STATE_MISS_READ, // read block 1 from SRAM/LUT - STATE_MISS_READ_DELAY, // read block 1 from SRAM/LUT + STATE_MISS_FETCH_WDV, // aligned miss, issue read to AHB and wait for data. + STATE_MISS_FETCH_DONE, // write data into SRAM/LUT + STATE_MISS_READ, // read block 1 from SRAM/LUT + STATE_MISS_READ_DELAY, // read block 1 from SRAM/LUT - STATE_MISS_SPILL_FETCH_WDV, // spill, miss on block 0, issue read to AHB and wait - STATE_MISS_SPILL_FETCH_DONE, // write data into SRAM/LUT - STATE_MISS_SPILL_READ1, // read block 0 from SRAM/LUT - STATE_MISS_SPILL_2, // return to ready if hit or do second block update. - STATE_MISS_SPILL_2_START, // return to ready if hit or do second block update. - STATE_MISS_SPILL_MISS_FETCH_WDV, // miss on block 1, issue read to AHB and wait - STATE_MISS_SPILL_MISS_FETCH_DONE, // write data to SRAM/LUT - STATE_MISS_SPILL_MERGE, // read block 0 of CPU access, + STATE_MISS_SPILL_FETCH_WDV, // spill, miss on block 0, issue read to AHB and wait + STATE_MISS_SPILL_FETCH_DONE, // write data into SRAM/LUT + STATE_MISS_SPILL_READ1, // read block 0 from SRAM/LUT + STATE_MISS_SPILL_2, // return to ready if hit or do second block update. + STATE_MISS_SPILL_2_START, // return to ready if hit or do second block update. + STATE_MISS_SPILL_MISS_FETCH_WDV, // miss on block 1, issue read to AHB and wait + STATE_MISS_SPILL_MISS_FETCH_DONE, // write data to SRAM/LUT + STATE_MISS_SPILL_MERGE, // read block 0 of CPU access, - STATE_MISS_SPILL_FINAL, // this state replicates STATE_READY's replay of the - // spill access but does nto consider spill. It also does not do another operation. + STATE_MISS_SPILL_FINAL, // this state replicates STATE_READY's replay of the + // spill access but does nto consider spill. It also does not do another operation. - STATE_INVALIDATE, // *** not sure if invalidate or evict? invalidate by cache block or address? - STATE_TLB_MISS, - STATE_TLB_MISS_DONE, + STATE_INVALIDATE, // *** not sure if invalidate or evict? invalidate by cache block or address? + STATE_TLB_MISS, + STATE_TLB_MISS_DONE, - STATE_CPU_BUSY, - STATE_CPU_BUSY_SPILL - } statetype; + STATE_CPU_BUSY, + STATE_CPU_BUSY_SPILL + } statetype; (* mark_debug = "true" *) statetype CurrState, NextState; - logic PreCntEn; + logic PreCntEn; // the FSM is always runing, do not stall. always_ff @(posedge clk) @@ -141,18 +141,18 @@ module icachefsm NextState = STATE_TLB_MISS; end else if (hit & ~spill) begin ICacheStallF = 1'b0; - LRUWriteEn = 1'b1; - if(StallF) begin - NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; - end else begin + LRUWriteEn = 1'b1; + if(StallF) begin + NextState = STATE_CPU_BUSY; + SelAdr = 2'b01; + end else begin NextState = STATE_READY; - end + end end else if (hit & spill) begin spillSave = 1'b1; SelAdr = 2'b10; LRUWriteEn = 1'b1; - NextState = STATE_HIT_SPILL; + NextState = STATE_HIT_SPILL; end else if (~hit & ~spill) begin CntReset = 1'b1; NextState = STATE_MISS_FETCH_WDV; @@ -161,12 +161,12 @@ module icachefsm SelAdr = 2'b01; NextState = STATE_MISS_SPILL_FETCH_WDV; end else begin - if(StallF) begin - NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; - end else begin + if(StallF) begin + NextState = STATE_CPU_BUSY; + SelAdr = 2'b01; + end else begin NextState = STATE_READY; - end + end end end // branch 1, hit spill and 2, miss spill hit @@ -204,15 +204,15 @@ module icachefsm ICacheReadEn = 1'b1; SelAdr = 2'b00; ICacheStallF = 1'b0; - LRUWriteEn = 1'b1; - - if(StallF) begin - NextState = STATE_CPU_BUSY_SPILL; - SelAdr = 2'b10; - end else begin + LRUWriteEn = 1'b1; + + if(StallF) begin + NextState = STATE_CPU_BUSY_SPILL; + SelAdr = 2'b10; + end else begin NextState = STATE_READY; - end - + end + end // branch 3 miss no spill STATE_MISS_FETCH_WDV: begin @@ -238,15 +238,15 @@ module icachefsm STATE_MISS_READ_DELAY: begin //SelAdr = 2'b01; ICacheReadEn = 1'b1; - ICacheStallF = 1'b0; - LRUWriteEn = 1'b1; - if(StallF) begin - SelAdr = 2'b01; - NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; - end else begin + ICacheStallF = 1'b0; + LRUWriteEn = 1'b1; + if(StallF) begin + SelAdr = 2'b01; + NextState = STATE_CPU_BUSY; + SelAdr = 2'b01; + end else begin NextState = STATE_READY; - end + end end // branch 4 miss spill hit, and 5 miss spill miss STATE_MISS_SPILL_FETCH_WDV: begin @@ -267,7 +267,7 @@ module icachefsm STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. SelAdr = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. ICacheReadEn = 1'b1; - LRUWriteEn = 1'b1; + LRUWriteEn = 1'b1; NextState = STATE_MISS_SPILL_2; end STATE_MISS_SPILL_2: begin @@ -284,13 +284,13 @@ module icachefsm ICacheReadEn = 1'b1; SelAdr = 2'b00; ICacheStallF = 1'b0; - LRUWriteEn = 1'b1; - if(StallF) begin - NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; - end else begin + LRUWriteEn = 1'b1; + if(StallF) begin + NextState = STATE_CPU_BUSY; + SelAdr = 2'b01; + end else begin NextState = STATE_READY; - end + end end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin @@ -317,13 +317,13 @@ module icachefsm ICacheReadEn = 1'b1; SelAdr = 2'b00; ICacheStallF = 1'b0; - LRUWriteEn = 1'b1; - if(StallF) begin - NextState = STATE_CPU_BUSY_SPILL; - SelAdr = 2'b10; - end else begin + LRUWriteEn = 1'b1; + if(StallF) begin + NextState = STATE_CPU_BUSY_SPILL; + SelAdr = 2'b10; + end else begin NextState = STATE_READY; - end + end end STATE_TLB_MISS: begin if (WalkerInstrPageFaultF) begin @@ -331,38 +331,40 @@ module icachefsm ICacheStallF = 1'b0; end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; + ICacheStallF = 1'b1; end else begin NextState = STATE_TLB_MISS; + ICacheStallF = 1'b0; end end STATE_TLB_MISS_DONE: begin - SelAdr = 2'b01; + SelAdr = 2'b01; NextState = STATE_READY; end STATE_CPU_BUSY: begin - ICacheStallF = 1'b0; - if (ITLBMissF) begin + ICacheStallF = 1'b0; + if (ITLBMissF) begin NextState = STATE_TLB_MISS; - end else if(StallF) begin - NextState = STATE_CPU_BUSY; - SelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end + end else if(StallF) begin + NextState = STATE_CPU_BUSY; + SelAdr = 2'b01; + end + else begin + NextState = STATE_READY; + end end STATE_CPU_BUSY_SPILL: begin - ICacheStallF = 1'b0; - ICacheReadEn = 1'b1; - if (ITLBMissF) begin + ICacheStallF = 1'b0; + ICacheReadEn = 1'b1; + if (ITLBMissF) begin NextState = STATE_TLB_MISS; - end else if(StallF) begin - NextState = STATE_CPU_BUSY_SPILL; - SelAdr = 2'b10; - end - else begin - NextState = STATE_READY; - end + end else if(StallF) begin + NextState = STATE_CPU_BUSY_SPILL; + SelAdr = 2'b10; + end + else begin + NextState = STATE_READY; + end end default: begin SelAdr = 2'b01; @@ -374,8 +376,8 @@ module icachefsm assign CntEn = PreCntEn & InstrAckF; assign InstrReadF = (CurrState == STATE_HIT_SPILL_MISS_FETCH_WDV) || - (CurrState == STATE_MISS_FETCH_WDV) || - (CurrState == STATE_MISS_SPILL_FETCH_WDV) || - (CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV); + (CurrState == STATE_MISS_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_FETCH_WDV) || + (CurrState == STATE_MISS_SPILL_MISS_FETCH_WDV); endmodule diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index c6f3d5c1..09d89a2c 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -138,7 +138,7 @@ module uartPC16550D( LSR <= #1 8'b01100000; MSR <= #1 4'b0; if (`FPGA) begin - DLL <= #1 8'd11; + DLL <= #1 8'd25; DLM <= #1 8'b0; end else begin DLL <= #1 8'd1; // this cannot be zero with DLM also zer0. @@ -154,8 +154,8 @@ module uartPC16550D( 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section 3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0]; -----/\----- EXCLUDED -----/\----- */ - // *** BUG FIX ME for now for the divider to be 11. Our clock is 10 Mhz. 10Mhz /(11 * 16) = 56818 baud, which is close enough to 57600 baud - 3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section + // *** BUG FIX ME for now for the divider to be 11. Our clock is 23 Mhz. 23Mhz /(25 * 16) = 57600 baud, which is close enough to 57600 baud + 3'b000: if (DLAB) DLL <= #1 8'd25; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section 3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0]; 3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing