diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index e1e4f300d..cb59bb69e 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 @@ -62,10 +62,8 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 56'h00003FFF -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 58c1c8a0a..26e37fa6c 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -50,8 +50,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 @@ -64,10 +64,10 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 56'h00003FFF -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00003FFF +`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/coremark-64i/wally-config.vh b/wally-pipelined/config/coremark-64i/wally-config.vh index f72b4f616..ae3100c6f 100644 --- a/wally-pipelined/config/coremark-64i/wally-config.vh +++ b/wally-pipelined/config/coremark-64i/wally-config.vh @@ -55,25 +55,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh index ccf0a64b8..e4e3376db 100644 --- a/wally-pipelined/config/coremark/wally-config.vh +++ b/wally-pipelined/config/coremark/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h00000000000100b0 @@ -63,25 +63,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh index d55200b48..95441f8f0 100644 --- a/wally-pipelined/config/coremark_bare/wally-config.vh +++ b/wally-pipelined/config/coremark_bare/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 @@ -66,25 +66,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index 29cd973ae..090da8d62 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -48,8 +48,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 @@ -63,10 +63,8 @@ // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 34'h00003FFF -//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 34'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 34'h80000000 `define TIM_RANGE 34'h07FFFFFF diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 2e5eaf378..01680b9d8 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -50,8 +50,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h0000000000000000 @@ -64,25 +64,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index a15ef18b7..44a90e1c2 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 @@ -67,10 +67,10 @@ // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_RANGE 56'h00003FFF -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +//`define BOOTTIM_RANGE 56'h00003FFF +//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index 293222104..25b8cbca9 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 @@ -66,25 +66,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh index 5ecb9bef5..a554a612b 100644 --- a/wally-pipelined/config/rv64imc/wally-config.vh +++ b/wally-pipelined/config/rv64imc/wally-config.vh @@ -48,8 +48,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h0000000080000000 @@ -62,25 +62,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/shared/wally-constants.vh b/wally-pipelined/config/shared/wally-constants.vh index 706997b93..99269ae5b 100644 --- a/wally-pipelined/config/shared/wally-constants.vh +++ b/wally-pipelined/config/shared/wally-constants.vh @@ -39,7 +39,9 @@ `define VPN_BITS (`XLEN==32 ? (2*`VPN_SEGMENT_BITS) : (4*`VPN_SEGMENT_BITS)) `define PPN_BITS (`XLEN==32 ? 22 : 44) `define PA_BITS (`XLEN==32 ? 34 : 56) -`define SVMODE_BITS (`XLEN == 32 ? 1 : 4) +`define SVMODE_BITS (`XLEN==32 ? 1 : 4) +`define ASID_BASE (`XLEN==32 ? 22 : 44) +`define ASID_BITS (`XLEN==32 ? 9 : 16) // constants to check SATP_MODE against // defined in Table 4.3 of the privileged spec diff --git a/wally-pipelined/linux-testgen/logAllBuildroot.sh b/wally-pipelined/linux-testgen/logAllBuildroot.sh index 740fa8c4b..86bba6af4 100755 --- a/wally-pipelined/linux-testgen/logAllBuildroot.sh +++ b/wally-pipelined/linux-testgen/logAllBuildroot.sh @@ -21,11 +21,12 @@ # - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" + # Uncomment this version in case you just want to have qemu_in_gdb_format.txt around # It is often helpful for general debugging -#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog - +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >/courses/e190ax/buildroot_boot/qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog # Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection +#cd /courses/e190ax/buildroot_boot #split -d -l 5600000 qemu_in_gdb_format.txt --verbose # Uncomment this version for parse_gdb_output.py debugging @@ -36,4 +37,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog diff --git a/wally-pipelined/linux-testgen/parse_qemu.py b/wally-pipelined/linux-testgen/parse_qemu.py index ac5d95f0b..4da168aba 100755 --- a/wally-pipelined/linux-testgen/parse_qemu.py +++ b/wally-pipelined/linux-testgen/parse_qemu.py @@ -9,9 +9,10 @@ pageFaultCSRs = {} regs = {} pageFaultRegs = {} instrs = {} +instrCount = 0 def printPC(l): - global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, instrCount if not inPageFault: inst = l.split() if len(inst) > 3: @@ -19,6 +20,9 @@ def printPC(l): else: print(f'=> {inst[1]}:\t{inst[2]}') print(f'{inst[0]} 0x{inst[1]}') + instrCount += 1 + if ((instrCount % 100000) == 0): + sys.stderr.write("QEMU parser reached "+str(instrCount)+" instrs\n") def printCSRs(): global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index a891c206b..843ed2745 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -122,11 +122,10 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW -#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW -#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW -#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW -#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW -#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW +add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG_ARRAY_REGW +add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW +add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW +add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW add wave -divider add wave -hex -r /testbench/* diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 1304b40c6..a42bfbd43 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -9,7 +9,8 @@ add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/DataStall +#add wave /testbench/dut/hart/DataStall +add wave /testbench/debug add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 213b5ceea..42da60938 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,19 +7,19 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM @@ -118,18 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -expand -group alu -divider internals -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -group alu -divider internals +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -243,7 +243,6 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW @@ -294,42 +293,7 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate -add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState -add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE add wave -noupdate -expand -group ptwalker -divider data -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState -add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU -add wave -noupdate /testbench/dut/hart/lsu/DataStall add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn @@ -356,7 +320,6 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit @@ -367,8 +330,8 @@ add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWr add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {2540 ns} 0} {{Cursor 4} {681 ns} 0} +quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -383,4 +346,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {4209 ns} {4657 ns} +WaveRestoreZoom {2313 ns} {2789 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e5..e7098d755 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,8 +115,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - - + localparam STATE_INSTR_PAGE_FAULT = 'h15; + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -370,13 +370,20 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF | WalkerInstrPageFaultF) begin + if (WalkerInstrPageFaultF) begin + NextState = STATE_INSTR_PAGE_FAULT; + ICacheStallF = 1'b0; + end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; end end - STATE_TLB_MISS_DONE : begin + STATE_TLB_MISS_DONE: begin + NextState = STATE_READY; + end + STATE_INSTR_PAGE_FAULT: begin + ICacheStallF = 1'b0; NextState = STATE_READY; end default: begin @@ -425,8 +432,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) // store read data from memory interface before writing into SRAM. genvar i; generate - for (i = 0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(.clk(clk), + for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer + flopenr #(`XLEN) sb(.clk(clk), .reset(reset), .en(InstrAckF & (i == FetchCount)), .d(InstrInF), diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index f40da412a..426697529 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -106,7 +106,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate @@ -214,7 +214,7 @@ module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 84e8f3b67..4bd079e96 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -53,13 +53,6 @@ module ahblite ( input logic [1:0] MemSizeM, //output logic DataStall, // Signals from MMU -/* -----\/----- EXCLUDED -----\/----- - input logic MMUStall, - input logic [`XLEN-1:0] MMUPAdr, - input logic MMUTranslate, - output logic [`XLEN-1:0] MMUReadPTE, - output logic MMUReady, - -----/\----- EXCLUDED -----/\----- */ // Signals from PMA checker input logic DSquashBusAccessM, ISquashBusAccessF, // Signals to PMA checker (metadata of proposed access) @@ -159,8 +152,6 @@ module ahblite ( -----/\----- EXCLUDED -----/\----- */ - //assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || - // MMUStall); // bus outputs assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) || @@ -225,11 +216,9 @@ module ahblite ( subwordread swr(.*); // Handle AMO instructions if applicable - generate + generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; -// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), -// .result(AMOResult)); amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index ff29dfd70..59f5e4392 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -43,90 +43,93 @@ module fpu ( output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - // control logic signal instantiation - logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) - logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - // FMA signals - logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units - logic [161:0] AlignedAddendE, AlignedAddendM; - logic [12:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; - logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - // add/cvt signals - logic [63:0] AddSumE, AddSumM; - logic [63:0] AddSumTcE, AddSumTcM; - logic [3:0] AddSelInvE, AddSelInvM; - logic [10:0] AddExpPostSumE,AddExpPostSumM; - logic AddCorrSignE, AddCorrSignM; - logic AddOp1NormE, AddOp1NormM; - logic AddOp2NormE, AddOp2NormM; - logic AddOpANormE, AddOpANormM; - logic AddOpBNormE, AddOpBNormM; - logic AddInvalidE, AddInvalidM; - logic AddDenormInE, AddDenormInM; - logic AddSwapE, AddSwapM; - logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 - logic AddSignAE, AddSignAM; - logic AddConvertE, AddConvertM; - logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentE, AddExponentM; - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResult64W; - logic [4:0] FPUFlagsW; - - + generate + if (`F_SUPPORTED) begin + // control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + // FMA signals + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units + logic [161:0] AlignedAddendE, AlignedAddendM; + logic [12:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; + logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; + logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResult64W; + logic [4:0] FPUFlagsW; + + @@ -134,189 +137,19 @@ module fpu ( - //DECODE STAGE - - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FWriteEnW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResult64W, - FRD1D, FRD2D, FRD3D); - - - - - - - - - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); - - - - - - - - - - - - - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, - .ForwardXE, .ForwardYE, .ForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); - - - // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, - .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XNaNE, .YNaNE, .ZNaNE ); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - - fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, - .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, - .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - - - - // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, - .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - - // first and only instance of floating-point comparator - fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.SrcXE, .FmtE, .ClassResE); - - // output for store instructions - assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - //***swap to mux - - - - - - - - - - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - - flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, - {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - - flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, - {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - - - - - - - - //BEGIN MEMORY STAGE - - mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); - mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); - - //***change to mux - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, - .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, - .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, - .FMAResM, .FMAFlgM); - - // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, - .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); + //DECODE STAGE + + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FWriteEnW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResult64W, + FRD1D, FRD2D, FRD3D); @@ -326,77 +159,260 @@ module fpu ( + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + + + // first of two-stage instance of floating-point fused multiply-add unit + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); + + + + // first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); + + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); + + // output for store instructions + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; + //***swap to mux + + + + + + + + + + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); + + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); + + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + + + + + + + + //BEGIN MEMORY STAGE + + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + + //***change to mux + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); + + // second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - - flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, - {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - - - - - - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### -//***turn into muxs - always_comb begin - case (FResultSelW) - 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FMAFlgW; - 3'b010 : FPUFlagsW = FAddFlgW; - 3'b011 : FPUFlagsW = FDivSqrtFlgW; - 3'b100 : FPUFlagsW = {4'b0,FFlgW}; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - always_comb begin - case (FResultSelW) - 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FMAResW; - 3'b010 : FPUResult64W = FAddResW; - 3'b011 : FPUResult64W = FDivResultW; - 3'b100 : FPUResult64W = FResW; - default : FPUResult64W = 64'bxxxxx; - endcase - end - - - // interface between XLEN size datapath and double-precision sized - // floating-point results - // - // define offsets for LSB zero extension or truncation - always_comb begin - // zero extension -//***turn into mux - FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; - //*** put into mem stage - SetFflagsM = FPUFlagsW; + + + + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); + + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); + + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); + + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); + + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); + + + + + + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + + + + + + //***turn into muxs + always_comb begin + case (FResultSelW) + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + always_comb begin + case (FResultSelW) + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; + endcase + end + + + // interface between XLEN size datapath and double-precision sized + // floating-point results + // + // define offsets for LSB zero extension or truncation + always_comb begin + // zero extension + //***turn into mux + FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage + SetFflagsM = FPUFlagsW; + end + end else begin // no F_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + assign FPUResultW = 0; end + endgenerate endmodule // fpu diff --git a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv index 7b4d3f64e..eca5fadf6 100644 --- a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv +++ b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv @@ -2,6 +2,7 @@ // It is unsigned and uses Radix-4 Booth encoding. // This file was automatically generated by tdm.pl. +/* module mult64 (x, y, P); input [63:0] x; @@ -18,7 +19,8 @@ module mult64 (x, y, P); //assign P = Pt[127:0]; ldf128 cpa (cout, P, Sum, Carry, 1'b0); -endmodule // mult64 +endmodule // mult64 +*/ module multiplier( y, x, Sum, Carry ); @@ -11612,7 +11614,7 @@ module r4be(x0,x1,x2,sing,doub,neg); endmodule // r4be - +/* // Use maj and two xor2's, with cin being late module fullAdd_xc(cout, s, a, b, cin); @@ -11629,7 +11631,7 @@ module fullAdd_xc(cout, s, a, b, cin); maj MAJ_0_112(cout,a,b,cin); endmodule // fullAdd_xc - +*/ module maj(y, a, b, c); @@ -11645,6 +11647,7 @@ module maj(y, a, b, c); endmodule // maj +/* // 4:2 Weinberger compressor module fourtwo_x(t, S, C, X, Y, Z, W, t_1); @@ -11664,6 +11667,7 @@ module fourtwo_x(t, S, C, X, Y, Z, W, t_1); fullAdd_xc secondCSA_0_160(C,S,W,t_1,intermediate); endmodule // fourtwo_x +*/ module inverter(egress, in); @@ -11767,6 +11771,7 @@ module fullAdd_x(cout,sum,a,b,c); endmodule // fullAdd_x +/* module nand2(egress,in1,in2); output egress; @@ -11800,7 +11805,7 @@ module and3(y,a,b,c); assign y = a&b&c; endmodule // and3 - +*/ module and2(y,a,b); output y; @@ -11810,7 +11815,7 @@ module and2(y,a,b); assign y = a&b; endmodule // and2 - +/* module nor2(egress,in1,in2); output egress; @@ -11902,6 +11907,7 @@ module oai(egress,in1,in2,in3); assign egress = ~(in3 & (in1|in2)); endmodule // oai +*/ module aoi(egress,in1,in2,in3); @@ -11949,7 +11955,7 @@ module fullAdd_i(cout_b,sum_b,a,b,c); sum_b sum_0_32(sum_b,a,b,c,cout_b); endmodule // fullAdd_i - +/* module fullAdd(cout,s,a,b,c); output cout; @@ -11979,7 +11985,7 @@ module blackCell(g_i_j, p_i_j, g_i_k, p_i_k, g_kneg1_j, p_kneg1_j); and2 and_0_48(p_i_j, p_i_k, p_kneg1_j); endmodule // blackCell - +*/ module grayCell(g_i_j, g_i_k, p_i_k, g_kneg1_j); output g_i_j; diff --git a/wally-pipelined/src/fpu/shifter_denorm.sv b/wally-pipelined/src/fpu/shifter_denorm.sv index ed2083816..e56b00729 100755 --- a/wally-pipelined/src/fpu/shifter_denorm.sv +++ b/wally-pipelined/src/fpu/shifter_denorm.sv @@ -118,6 +118,7 @@ module barrel_shifter_r57 (Z, Sticky, A, Shift); endmodule // barrel_shifter_r57 +/* module barrel_shifter_r64 (Z, Sticky, A, Shift); input [63:0] A; @@ -160,3 +161,4 @@ module barrel_shifter_r64 (Z, Sticky, A, Shift); assign Sticky = (S != sixtythreezeros); endmodule // barrel_shifter_r64 +*/ \ No newline at end of file diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index a5636c6f4..cb583de2e 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -77,7 +77,7 @@ module flopenr #(parameter WIDTH = 8) ( output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 0; else if (en) q <= #1 d; endmodule diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv index 881525882..70e1076d6 100755 --- a/wally-pipelined/src/generic/shift.sv +++ b/wally-pipelined/src/generic/shift.sv @@ -38,13 +38,12 @@ module shift_right #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; @@ -60,13 +59,12 @@ module shift_left #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 102fbbedf..ac2c06dd5 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -42,7 +42,7 @@ module alu #(parameter WIDTH=32) ( assign {carry, presum} = a + condinvb + {{(WIDTH-1){1'b0}},alucontrol[3]}; // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits - generate + generate if (WIDTH==64) assign sum = w64 ? {{32{presum[31]}}, presum[31:0]} : presum; else diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 44a40045a..f041fce63 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -129,7 +129,7 @@ module datapath ( flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported - generate + generate if (`A_SUPPORTED) assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; else diff --git a/wally-pipelined/src/ifu/SRAM2P1R1W.sv b/wally-pipelined/src/ifu/SRAM2P1R1W.sv index d71f8bc4c..046aacc63 100644 --- a/wally-pipelined/src/ifu/SRAM2P1R1W.sv +++ b/wally-pipelined/src/ifu/SRAM2P1R1W.sv @@ -97,11 +97,11 @@ module SRAM2P1R1W // write port generate - for (index = 0; index < Width; index = index + 1) begin + for (index = 0; index < Width; index = index + 1) begin:mem always_ff @ (posedge clk) begin - if (WEN1Q & BitWEN1[index]) begin - memory[WA1Q][index] <= WD1Q[index]; - end + if (WEN1Q & BitWEN1[index]) begin + memory[WA1Q][index] <= WD1Q[index]; + end end end endgenerate diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index ddfd88cc5..24952edfa 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -70,15 +70,16 @@ module ifu ( input logic [`XLEN-1:0] PageTableEntryF, input logic [1:0] PageTypeF, input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, input logic ITLBWriteF, ITLBFlushF, input logic WalkerInstrPageFaultF, output logic ITLBMissF, ITLBHitF, // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, output logic ISquashBusAccessF @@ -115,7 +116,7 @@ module ifu ( end endgenerate - mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) + mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10), @@ -187,7 +188,7 @@ module ifu ( flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor - generate + generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. bpred bpred(.*, diff --git a/wally-pipelined/src/ifu/localHistoryPredictor.sv b/wally-pipelined/src/ifu/localHistoryPredictor.sv index 8aaa85c0d..6c5c94783 100644 --- a/wally-pipelined/src/ifu/localHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/localHistoryPredictor.sv @@ -67,7 +67,7 @@ module localHistoryPredictor genvar index; generate - for (index = 0; index < 2**m; index = index +1) begin + for (index = 0; index < 2**m; index = index +1) begin:localhist flopenr #(k) LocalHistoryRegister(.clk(clk), .reset(reset), diff --git a/wally-pipelined/src/lsu/dcache.sv b/wally-pipelined/src/lsu/dcache.sv index fec70ef4b..e8dfeb5cd 100644 --- a/wally-pipelined/src/lsu/dcache.sv +++ b/wally-pipelined/src/lsu/dcache.sv @@ -151,7 +151,7 @@ module dcachecontroller #(parameter LINESIZE = 256) ( genvar i; generate - for (i=0; i < WORDSPERLINE; i++) begin + for (i=0; i < WORDSPERLINE; i++) begin:sb flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); end endgenerate diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 36d4715fe..a2bcf52bb 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -31,8 +31,7 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, - output logic DataStall, - output logic HPTWReady, + output logic DCacheStall, // Memory Stage // connected to cpu (controls) @@ -65,22 +64,25 @@ module lsu ( output logic [1:0] AtomicMaskedM, input logic MemAckW, // from ahb input logic [`XLEN-1:0] HRDATAW, // from ahb - output logic [2:0] Funct3MfromLSU, - output logic StallWfromLSU, + output logic [2:0] SizeFromLSU, + output logic StallWfromLSU, // mmu management // page table walker - input logic [`XLEN-1:0] PageTableEntryM, - input logic [1:0] PageTypeM, input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, // from csr - input logic DTLBWriteM, - output logic DTLBMissM, - input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. - + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic [`XLEN-1:0] PCF, + input logic ITLBMissF, + output logic [`XLEN-1:0] PageTableEntryF, + output logic [1:0] PageTypeF, + output logic ITLBWriteF, + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM, output logic DTLBHitM, // not connected @@ -88,8 +90,8 @@ module lsu ( input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. input logic [2:0] HSIZE, HBURST, input logic HWRITE, - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. @@ -119,14 +121,106 @@ module lsu ( logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. + logic DTLBMissM; + logic [`XLEN-1:0] PageTableEntryM; + logic [1:0] PageTypeM; + logic DTLBWriteM; + logic [`XLEN-1:0] MMUReadPTE; + logic MMUReady; + logic HPTWStall; + logic [`XLEN-1:0] MMUPAdr; + logic MMUTranslate; + logic HPTWRead; + logic [1:0] MemRWMtoLSU; + logic [2:0] SizeToLSU; + logic [1:0] AtomicMtoLSU; + logic [`XLEN-1:0] MemAdrMtoLSU; + logic [`XLEN-1:0] WriteDataMtoLSU; + logic [`XLEN-1:0] ReadDataWFromLSU; + logic StallWtoLSU; + logic CommittedMfromLSU; + logic SquashSCWfromLSU; + logic DataMisalignedMfromLSU; + logic HPTWReady; + logic LSUStall; + logic DisableTranslation; // used to stop intermediate PTE physical addresses being saved to TLB. + + + + // for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the // CPU's read data input ReadDataW. - assign ReadDataW = HRDATAW; + assign ReadDataWFromLSU = HRDATAW; + + + pagetablewalker pagetablewalker( + .clk(clk), + .reset(reset), + .SATP_REGW(SATP_REGW), + .PCF(PCF), + .MemAdrM(MemAdrM), + .ITLBMissF(ITLBMissF), + .DTLBMissM(DTLBMissM), + .MemRWM(MemRWM), + .PageTableEntryF(PageTableEntryF), + .PageTableEntryM(PageTableEntryM), + .PageTypeF(PageTypeF), + .PageTypeM(PageTypeM), + .ITLBWriteF(ITLBWriteF), + .DTLBWriteM(DTLBWriteM), + .MMUReadPTE(MMUReadPTE), + .MMUReady(HPTWReady), + .HPTWStall(HPTWStall), + .MMUPAdr(MMUPAdr), + .MMUTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM)); + + + + // arbiter between IEU and pagetablewalker + lsuArb arbiter(.clk(clk), + .reset(reset), + // HPTW connection + .HPTWTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .HPTWPAdr(MMUPAdr), + .HPTWReadPTE(MMUReadPTE), + .HPTWStall(HPTWStall), + // CPU connection + .MemRWM(MemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .MemAdrM(MemAdrM), + .WriteDataM(WriteDataM), // *** Need to remove this. + .StallW(StallW), + .ReadDataW(ReadDataW), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .DCacheStall(DCacheStall), + // LSU + .DisableTranslation(DisableTranslation), + .MemRWMtoLSU(MemRWMtoLSU), + .SizeToLSU(SizeToLSU), + .AtomicMtoLSU(AtomicMtoLSU), + .MemAdrMtoLSU(MemAdrMtoLSU), + .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? + .StallWtoLSU(StallWtoLSU), + .CommittedMfromLSU(CommittedMfromLSU), + .SquashSCWfromLSU(SquashSCWfromLSU), + .DataMisalignedMfromLSU(DataMisalignedMfromLSU), + .ReadDataWFromLSU(ReadDataWFromLSU), + .DataStall(LSUStall)); + + - mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) - dmmu(.TLBAccessType(MemRWM), - .VirtualAddress(MemAdrM), - .Size(Funct3M[1:0]), + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) + dmmu(.TLBAccessType(MemRWMtoLSU), + .VirtualAddress(MemAdrMtoLSU), + .Size(SizeToLSU[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -137,45 +231,46 @@ module lsu ( .TLBPageFault(DTLBPageFaultM), .ExecuteAccessF(1'b0), .AtomicAccessM(AtomicMaskedM[1]), - .WriteAccessM(MemRWM[0]), - .ReadAccessM(MemRWM[1]), + .WriteAccessM(MemRWMtoLSU[0]), + .ReadAccessM(MemRWMtoLSU[1]), .SquashBusAccess(DSquashBusAccessM), + .DisableTranslation(DisableTranslation), // .SelRegions(DHSELRegionsM), .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? // Specify which type of page fault is occurring - assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1]; - assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0]; + assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWMtoLSU[1]; + assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWMtoLSU[0]; // Determine if an Unaligned access is taking place always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd + case(SizeToLSU[1:0]) + 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu + 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu + 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedMfromLSU = |MemAdrMtoLSU[2:0]; // ld, sd, fld, fsd endcase // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit - // Changed DataMisalignedM to a larger combination of trap sources + // Changed DataMisalignedMfromLSU to a larger combination of trap sources // NonBusTrapM is anything that the bus doesn't contribute to producing // By contrast, using TrapM results in circular logic errors - assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; - assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; - assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ; + assign MemReadM = MemRWMtoLSU[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWMtoLSU[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; + assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicMtoLSU : 2'b00 ; assign MemAccessM = MemReadM | MemWriteM; // Determine if M stage committed // Reset whenever unstalled. Set when access successfully occurs - flopr #(1) committedMreg(clk,reset,(CommittedM | CommitM) & StallM,preCommittedM); - assign CommittedM = preCommittedM | CommitM; + flopr #(1) committedMreg(clk,reset,(CommittedMfromLSU | CommitM) & StallM,preCommittedM); + assign CommittedMfromLSU = preCommittedM | CommitM; // Determine if address is valid - assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign LoadAccessFaultM = MemRWM[1]; - assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - assign StoreAccessFaultM = MemRWM[0]; + assign LoadMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[1]; + assign LoadAccessFaultM = MemRWMtoLSU[1]; + assign StoreMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[0]; + assign StoreAccessFaultM = MemRWMtoLSU[0]; // Handle atomic load reserved / store conditional generate @@ -184,9 +279,9 @@ module lsu ( logic ReservationValidM, ReservationValidW; logic lrM, scM, WriteAdrMatchM; - assign lrM = MemReadM && AtomicM[0]; - assign scM = MemRWM[0] && AtomicM[0]; - assign WriteAdrMatchM = MemRWM[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; + assign lrM = MemReadM && AtomicMtoLSU[0]; + assign scM = MemRWMtoLSU[0] && AtomicMtoLSU[0]; + assign WriteAdrMatchM = MemRWMtoLSU[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; assign SquashSCM = scM && ~WriteAdrMatchM; always_comb begin // ReservationValidM (next value of valid reservation) if (lrM) ReservationValidM = 1; // set valid on load reserve @@ -195,15 +290,15 @@ module lsu ( end flopenrc #(`PA_BITS-2) resadrreg(clk, reset, FlushW, lrM, MemPAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenrc #(1) resvldreg(clk, reset, FlushW, lrM, ReservationValidM, ReservationValidW); - flopenrc #(1) squashreg(clk, reset, FlushW, ~StallW, SquashSCM, SquashSCW); + flopenrc #(1) squashreg(clk, reset, FlushW, ~StallWtoLSU, SquashSCM, SquashSCWfromLSU); end else begin // Atomic operations not supported assign SquashSCM = 0; - assign SquashSCW = 0; + assign SquashSCWfromLSU = 0; end endgenerate // Data stall - //assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); + //assign LSUStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); assign HPTWReady = (CurrState == STATE_READY); @@ -224,22 +319,22 @@ module lsu ( STATE_READY: if (DTLBMissM) begin NextState = STATE_PTW_READY; - DataStall = 1'b1; + LSUStall = 1'b1; end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check - DataStall = 1'b1; - end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin + LSUStall = 1'b1; + end else if((MemReadM & AtomicMtoLSU[0]) | (MemWriteM & AtomicMtoLSU[0])) begin NextState = STATE_FETCH_AMO_2; - DataStall = 1'b1; - end else if (MemAccessM & ~DataMisalignedM) begin + LSUStall = 1'b1; + end else if (MemAccessM & ~DataMisalignedMfromLSU) begin NextState = STATE_FETCH; - DataStall = 1'b1; + LSUStall = 1'b1; end else begin NextState = STATE_READY; - DataStall = 1'b0; + LSUStall = 1'b0; end STATE_FETCH_AMO_1: begin - DataStall = 1'b1; + LSUStall = 1'b1; if (MemAckW) begin NextState = STATE_FETCH_AMO_2; end else begin @@ -247,45 +342,45 @@ module lsu ( end end STATE_FETCH_AMO_2: begin - DataStall = 1'b1; - if (MemAckW & ~StallW) begin + LSUStall = 1'b1; + if (MemAckW & ~StallWtoLSU) begin NextState = STATE_FETCH_AMO_2; - end else if (MemAckW & StallW) begin + end else if (MemAckW & StallWtoLSU) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH_AMO_2; end end STATE_FETCH: begin - DataStall = 1'b1; - if (MemAckW & ~StallW) begin + LSUStall = 1'b1; + if (MemAckW & ~StallWtoLSU) begin NextState = STATE_READY; - end else if (MemAckW & StallW) begin + end else if (MemAckW & StallWtoLSU) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH; end end STATE_STALLED: begin - DataStall = 1'b0; - if (~StallW) begin + LSUStall = 1'b0; + if (~StallWtoLSU) begin NextState = STATE_READY; end else begin NextState = STATE_STALLED; end end STATE_PTW_READY: begin - DataStall = 1'b0; + LSUStall = 1'b0; if (DTLBWriteM) begin NextState = STATE_READY; - end else if (MemReadM & ~DataMisalignedM) begin + end else if (MemReadM & ~DataMisalignedMfromLSU) begin NextState = STATE_PTW_FETCH; end else begin NextState = STATE_PTW_READY; end end STATE_PTW_FETCH : begin - DataStall = 1'b1; + LSUStall = 1'b1; if (MemAckW & ~DTLBWriteM) begin NextState = STATE_PTW_READY; end else if (MemAckW & DTLBWriteM) begin @@ -298,15 +393,15 @@ module lsu ( NextState = STATE_READY; end default: begin - DataStall = 1'b0; + LSUStall = 1'b0; NextState = STATE_READY; end endcase end // always_comb // *** for now just pass through size - assign Funct3MfromLSU = Funct3M; - assign StallWfromLSU = StallW; + assign SizeFromLSU = SizeToLSU; + assign StallWfromLSU = StallWtoLSU; endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 76d89798a..23e88970f 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -35,7 +35,6 @@ module lsuArb input logic [`XLEN-1:0] HPTWPAdr, // to page table walker. output logic [`XLEN-1:0] HPTWReadPTE, - output logic HPTWReady, output logic HPTWStall, // from CPU @@ -55,7 +54,7 @@ module lsuArb // to LSU output logic DisableTranslation, output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, + output logic [2:0] SizeToLSU, output logic [1:0] AtomicMtoLSU, output logic [`XLEN-1:0] MemAdrMtoLSU, output logic [`XLEN-1:0] WriteDataMtoLSU, @@ -65,7 +64,6 @@ module lsuArb input logic SquashSCWfromLSU, input logic DataMisalignedMfromLSU, input logic [`XLEN-1:0] ReadDataWFromLSU, - input logic HPTWReadyfromLSU, input logic DataStall ); @@ -89,6 +87,7 @@ module lsuArb statetype CurrState, NextState; logic SelPTW; logic HPTWStallD; + logic [2:0] PTWSize; flopenl #(.TYPE(statetype)) StateReg(.clk(clk), @@ -140,12 +139,9 @@ module lsuArb assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate - if (`XLEN == 32) begin - assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; - end else begin - assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end + assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw endgenerate + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, SizeToLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; @@ -159,7 +155,6 @@ module lsuArb assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; - assign HPTWReady = HPTWReadyfromLSU; // *** need to rename DcacheStall and Datastall. // not clear at all. I think it should be LSUStall from the LSU, // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). diff --git a/wally-pipelined/src/mmu/adrdecs.sv b/wally-pipelined/src/mmu/adrdecs.sv index 8585a4ee7..94951aad7 100644 --- a/wally-pipelined/src/mmu/adrdecs.sv +++ b/wally-pipelined/src/mmu/adrdecs.sv @@ -24,12 +24,13 @@ /////////////////////////////////////////// `include "wally-config.vh" + // verilator lint_off UNOPTFLAT module adrdecs ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic AccessRW, AccessRX, AccessRWX, input logic [1:0] Size, - output logic [5:0] SelRegions + output logic [6:0] SelRegions ); // Determine which region of physical memory (if any) is being accessed @@ -41,5 +42,8 @@ module adrdecs ( adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]); adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]); + assign SelRegions[6] = ~|(SelRegions[5:0]); + endmodule + // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index 8082d01ad..4faac7bc1 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -26,15 +26,14 @@ `include "wally-config.vh" -// The TLB will have 2**ENTRY_BITS total entries - -module mmu #(parameter ENTRY_BITS = 3, +module mmu #(parameter TLB_ENTRIES = 8, // nuber of TLB Entries parameter IMMU = 0) ( input logic clk, reset, // Current value of satp CSR (from privileged unit) input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, // Current privilege level of the processeor input logic [1:0] PrivilegeModeW, @@ -68,7 +67,7 @@ module mmu #(parameter ENTRY_BITS = 3, // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit @@ -82,7 +81,7 @@ module mmu #(parameter ENTRY_BITS = 3, logic Cacheable, Idempotent, AtomicAllowed; // *** here so that the pmachecker has somewhere to put these outputs. *** I'm leaving them as outputs to pma checker, but I'm stopping them here. // Translation lookaside buffer - tlb #(.ENTRY_BITS(ENTRY_BITS), .ITLB(IMMU)) tlb(.*); + tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) tlb(.*); /////////////////////////////////////////// // Check physical memory accesses diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index d0d2152f6..45479d4ab 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -64,11 +64,6 @@ module pagetablewalker output logic HPTWRead, - - - // Stall signal - output logic MMUStall, - // Faults output logic WalkerInstrPageFaultF, output logic WalkerLoadPageFaultM, @@ -190,7 +185,6 @@ module pagetablewalker PRegEn = 1'b0; TranslationPAdr = '0; HPTWRead = 1'b0; - MMUStall = 1'b1; PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; @@ -209,7 +203,6 @@ module pagetablewalker end else begin NextWalkerState = IDLE; TranslationPAdr = '0; - MMUStall = 1'b0; end end @@ -271,14 +264,12 @@ module pagetablewalker LEAF: begin NextWalkerState = IDLE; - MMUStall = 1'b0; end FAULT: begin NextWalkerState = IDLE; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = 1'b0; end // Default case should never happen, but is included for linter. @@ -293,8 +284,6 @@ module pagetablewalker assign VPN1 = TranslationVAdrQ[31:22]; assign VPN0 = TranslationVAdrQ[21:12]; - //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || - // WalkerState == LEVEL2 || WalkerState == LEVEL1; // Capture page table entry from data cache @@ -335,7 +324,6 @@ module pagetablewalker PRegEn = 1'b0; TranslationPAdr = '0; HPTWRead = 1'b0; - MMUStall = 1'b1; PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; @@ -358,7 +346,6 @@ module pagetablewalker end else begin NextWalkerState = IDLE; TranslationPAdr = '0; - MMUStall = 1'b0; end end @@ -499,7 +486,6 @@ module pagetablewalker LEAF: begin NextWalkerState = IDLE; - MMUStall = 1'b0; end FAULT: begin @@ -507,7 +493,6 @@ module pagetablewalker WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = 1'b0; end // Default case should never happen diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 26d8ac875..86abcb3f6 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -45,7 +45,7 @@ module pmachecker ( logic PMAAccessFault; logic AccessRW, AccessRWX, AccessRX; - logic [5:0] SelRegions; + logic [6:0] SelRegions; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 50d399aed..0a14d8320 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -76,8 +76,9 @@ module pmpadrdec ( generate assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region - for (i=3; i < `PA_BITS; i=i+1) + for (i=3; i < `PA_BITS; i=i+1) begin:mask assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + end endgenerate // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 27c7e508c..9c7f11da4 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -39,7 +39,7 @@ module pmpchecker ( // this will be understood as a var. However, if we don't supply the `var` // keyword, the compiler warns us that it's interpreting the signal as a var, // which we might not intend. - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, @@ -51,42 +51,28 @@ module pmpchecker ( output logic PMPStoreAccessFaultM ); - // verilator lint_off UNOPTFLAT // Bit i is high when the address falls in PMP region i logic EnforcePMP; - logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; + logic [7:0] PMPCfg[`PMP_ENTRIES-1:0]; logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match - logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + // verilator lint_off UNOPTFLAT + logic [`PMP_ENTRIES-1:0] NoLowerMatch; // None of the lower PMP entries match + // verilator lint_on UNOPTFLAT + logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - assign PAgePMPAdr[0] = 1'b1; - assign NoLowerMatch[0] = 1'b1; - - generate - // verilator lint_off WIDTH - for (j=0; j<`PMP_ENTRIES; j = j+8) - assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], - PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; - // verilator lint_on WIDTH - for (i=0; i<`PMP_ENTRIES; i++) - pmpadrdec pmpadrdec(.PhysicalAddress, - .PMPCfg(PMPCFG[i]), - .PMPAdr(PMPADDR_ARRAY_REGW[i]), - .PAgePMPAdrIn(PAgePMPAdr[i]), - .PAgePMPAdrOut(PAgePMPAdr[i+1]), - .NoLowerMatchIn(NoLowerMatch[i]), - .NoLowerMatchOut(NoLowerMatch[i+1]), - .Match(Match[i]), - .Active(Active[i]), - .L(L[i]), .X(X[i]), .W(W[i]), .R(R[i]) - ); - - // verilator lint_on UNOPTFLAT - endgenerate + pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( + .PhysicalAddress, + .PMPCfg(PMPCFG_ARRAY_REGW), + .PMPAdr(PMPADDR_ARRAY_REGW), + .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), + .PAgePMPAdrOut(PAgePMPAdr), + .NoLowerMatchIn({NoLowerMatch[`PMP_ENTRIES-2:0], 1'b1}), + .NoLowerMatchOut(NoLowerMatch), + .Match, .Active, .L, .X, .W, .R); // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 1cf639064..34400647d 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -49,13 +49,14 @@ `include "wally-config.vh" // The TLB will have 2**ENTRY_BITS total entries -module tlb #(parameter ENTRY_BITS = 3, +module tlb #(parameter TLB_ENTRIES = 8, parameter ITLB = 0) ( input logic clk, reset, // Current value of satp CSR (from privileged unit) input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, // Current privilege level of the processeor input logic [1:0] PrivilegeModeW, @@ -92,35 +93,34 @@ module tlb #(parameter ENTRY_BITS = 3, // Store current virtual memory mode (SV32, SV39, SV48, ect...) logic [`SVMODE_BITS-1:0] SvMode; + logic [1:0] EffectivePrivilegeMode; // privilege mode, possibly modified by MPRV - // Index (currently random) to write the next TLB entry - logic [ENTRY_BITS-1:0] WriteIndex; - logic [(2**ENTRY_BITS)-1:0] WriteLines; // used as the one-hot encoding of WriteIndex + logic [TLB_ENTRIES-1:0] ReadLines, WriteLines, WriteEnables, PTE_G; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [`VPN_BITS-1:0] VirtualPageNumber; logic [`PPN_BITS-1:0] PhysicalPageNumber, PhysicalPageNumberMixed; logic [`PA_BITS-1:0] PhysicalAddressFull; + logic [`XLEN+1:0] VAExt; // Sections of the page table entry logic [7:0] PTEAccessBits; logic [11:0] PageOffset; - // Useful PTE Control Bits - logic PTE_U, PTE_X, PTE_W, PTE_R; - - // Pattern location in the CAM and type of page hit - logic [ENTRY_BITS-1:0] VPNIndex; + logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R; // Useful PTE Control Bits logic [1:0] HitPageType; - - // Whether the virtual address has a match in the CAM logic CAMHit; + logic [`ASID_BITS-1:0] ASID; + logic DAFault; - // Grab the sv mode from SATP + // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign ASID = SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]; + assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 + assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; - // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines - decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); + // Determine whether to write TLB + assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}}; // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64 // this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower, @@ -135,79 +135,67 @@ module tlb #(parameter ENTRY_BITS = 3, end endgenerate - // Whether translation should occur - assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; - // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches assign ReadAccess = TLBAccessType[1]; assign WriteAccess = TLBAccessType[0]; assign TLBAccess = ReadAccess || WriteAccess; - - assign PageOffset = VirtualAddress[11:0]; - // TLB entries are evicted according to the LRU algorithm - tlblru #(ENTRY_BITS) lru(.*); + tlblru #(TLB_ENTRIES) lru(.*); - tlbram #(ENTRY_BITS) tlbram(.*); - tlbcam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); + // TLB memory + tlbram #(TLB_ENTRIES) tlbram(.*); + tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); + + // Replace segments of the virtual page number with segments of the physical + // page number. For 4 KB pages, the entire virtual page number is replaced. + // For superpages, some segments are considered offsets into a larger page. + tlbphysicalpagemask PageMask(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); // unswizzle useful PTE bits - assign PTE_U = PTEAccessBits[4]; - assign PTE_X = PTEAccessBits[3]; - assign PTE_W = PTEAccessBits[2]; - assign PTE_R = PTEAccessBits[1]; - + assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; + assign {PTE_U, PTE_X, PTE_W, PTE_R} = PTEAccessBits[4:1]; + // Check whether the access is allowed, page faulting if not. - // *** We might not have S mode. generate if (ITLB == 1) begin logic ImproperPrivilege; // User mode may only execute user mode pages, and supervisor mode may // only execute non-user mode pages. - assign ImproperPrivilege = ((PrivilegeModeW == `U_MODE) && ~PTE_U) || - ((PrivilegeModeW == `S_MODE) && PTE_U); - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X); + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || + ((EffectivePrivilegeMode == `S_MODE) && PTE_U); + // fault for software handling if access bit is off + assign DAFault = ~PTE_A; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X || DAFault); end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. - assign ImproperPrivilege = ((PrivilegeModeW == `U_MODE) && ~PTE_U) || - ((PrivilegeModeW == `S_MODE) && PTE_U && ~STATUS_SUM); + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || + ((EffectivePrivilegeMode == `S_MODE) && PTE_U && ~STATUS_SUM); // Check for read error. Reads are invalid when the page is not readable // (and executable pages are not readable) or when the page is neither // readable nor executable (and executable pages are readable). - assign InvalidRead = ReadAccess && - ((~STATUS_MXR && ~PTE_R) || (STATUS_MXR && ~PTE_R && PTE_X)); + assign InvalidRead = ReadAccess && ~PTE_R && (~STATUS_MXR | ~PTE_X); // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess && ~PTE_W; - assign TLBPageFault = Translate && TLBHit && - (ImproperPrivilege || InvalidRead || InvalidWrite); + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite || DAFault); end endgenerate - // Replace segments of the virtual page number with segments of the physical - // page number. For 4 KB pages, the entire virtual page number is replaced. - // For superpages, some segments are considered offsets into a larger page. - physicalpagemask PageNumberMixer(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); - - // Provide physical address only on TLBHits to cause catastrophic errors if - // garbage address is used. - assign PhysicalAddressFull = (TLBHit) ? - {PhysicalPageNumberMixed, PageOffset} : '0; // Output the hit physical address if translation is currently on. - generate - if (`XLEN == 32) begin - mux2 #(`PA_BITS) addressmux({2'b0, VirtualAddress}, PhysicalAddressFull, Translate, PhysicalAddress); - end else begin - mux2 #(`PA_BITS) addressmux(VirtualAddress[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); - end - endgenerate + // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal + assign VAExt = {2'b00, VirtualAddress}; // extend length of virtual address if necessary for RV32 + assign PageOffset = VirtualAddress[11:0]; + assign PhysicalAddressFull = TLBHit ? {PhysicalPageNumberMixed, PageOffset} : '0; + mux2 #(`PA_BITS) addressmux(VAExt[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); assign TLBHit = CAMHit & TLBAccess; assign TLBMiss = ~TLBHit & ~TLBFlush & Translate & TLBAccess; diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index bd64afea9..0ad81605d 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -28,50 +28,36 @@ `include "wally-config.vh" -module tlbcam #(parameter ENTRY_BITS = 3, - parameter KEY_BITS = 20, - parameter SEGMENT_BITS = 10) ( +module tlbcam #(parameter TLB_ENTRIES = 8, + parameter KEY_BITS = 20, + parameter SEGMENT_BITS = 10) ( input logic clk, reset, - input logic [KEY_BITS-1:0] VirtualPageNumber, + input logic [`VPN_BITS-1:0] VirtualPageNumber, input logic [1:0] PageTypeWriteVal, -// input logic [`SVMODE_BITS-1:0] SvMode, // *** may not need to be used. - input logic TLBWrite, input logic TLBFlush, - input logic [2**ENTRY_BITS-1:0] WriteLines, - - output logic [ENTRY_BITS-1:0] VPNIndex, + input logic [TLB_ENTRIES-1:0] WriteEnables, + input logic [TLB_ENTRIES-1:0] PTE_G, + input logic [`ASID_BITS-1:0] ASID, + output logic [TLB_ENTRIES-1:0] ReadLines, output logic [1:0] HitPageType, output logic CAMHit ); - localparam NENTRIES = 2**ENTRY_BITS; + logic [1:0] PageTypeRead [TLB_ENTRIES-1:0]; + logic [TLB_ENTRIES-1:0] Matches; - - logic [1:0] PageTypeList [NENTRIES-1:0]; - logic [NENTRIES-1:0] Matches; - - // Create NENTRIES CAM lines, each of which will independently consider + // Create TLB_ENTRIES CAM lines, each of which will independently consider // whether the requested virtual address is a match. Each line stores the // original virtual page number from when the address was written, regardless // of page type. However, matches are determined based on a subset of the // page number segments. - generate - genvar i; - for (i = 0; i < NENTRIES; i++) begin - camline #(KEY_BITS, SEGMENT_BITS) camline( - .CAMLineWrite(WriteLines[i] && TLBWrite), - .PageType(PageTypeList[i]), - .Match(Matches[i]), - .*); - end - endgenerate - // In case there are multiple matches in the CAM, select only one - // *** it might be guaranteed that the CAM will never have multiple matches. - // If so, this is just an encoder - priorityencoder #(ENTRY_BITS) matchencoder(Matches, VPNIndex); - - assign CAMHit = |Matches & ~TLBFlush; - assign HitPageType = PageTypeList[VPNIndex]; + tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( + .WriteEnable(WriteEnables), + .PageTypeRead, // *** change name to agree + .Match(ReadLines), // *** change name to agree + .*); + assign CAMHit = |ReadLines & ~TLBFlush; + assign HitPageType = PageTypeRead.or; // applies OR to elements of the (TLB_ENTRIES x 2) array to get 2-bit result endmodule diff --git a/wally-pipelined/src/mmu/camline.sv b/wally-pipelined/src/mmu/tlbcamline.sv similarity index 69% rename from wally-pipelined/src/mmu/camline.sv rename to wally-pipelined/src/mmu/tlbcamline.sv index 6e3f705c3..ebb9ce3f5 100644 --- a/wally-pipelined/src/mmu/camline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// camline.sv +// tlbcamline.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 // Modified: kmacsaigoren@hmc.edu 1 June 2021 @@ -28,45 +28,41 @@ `include "wally-config.vh" -module camline #(parameter KEY_BITS = 20, - parameter SEGMENT_BITS = 10) ( - input logic clk, reset, +module tlbcamline #(parameter KEY_BITS = 20, + parameter SEGMENT_BITS = 10) ( + input logic clk, reset, + input logic [`VPN_BITS-1:0] VirtualPageNumber, // The requested page number to compare against the key + input logic [`ASID_BITS-1:0] ASID, + input logic WriteEnable, // Write a new entry to this line + input logic PTE_G, + input logic [1:0] PageTypeWriteVal, + input logic TLBFlush, // Flush this line (set valid to 0) + output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? + output logic Match +); - // input to check which SvMode is running -// input logic [`SVMODE_BITS-1:0] SvMode, // *** may no longer be needed. - - // The requested page number to compare against the key - input logic [KEY_BITS-1:0] VirtualPageNumber, - - // Signals to write a new entry to this line - input logic CAMLineWrite, - input logic [1:0] PageTypeWriteVal, - - // Flush this line (set valid to 0) - input logic TLBFlush, - - // This entry is a key for a tera, giga, mega, or kilopage. + // PageTypeRead is a key for a tera, giga, mega, or kilopage. // PageType == 2'b00 --> kilopage // PageType == 2'b01 --> megapage // PageType == 2'b10 --> gigapage // PageType == 2'b11 --> terapage - output logic [1:0] PageType, // *** should this be the stored version or the always updated one? - output logic Match -); // This entry has KEY_BITS for the key plus one valid bit. logic Valid; logic [KEY_BITS-1:0] Key; + logic [1:0] PageType; - // Split up key and query into sections for each page table level. + logic [`ASID_BITS-1:0] Key_ASID; logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; - logic Match0, Match1; + logic MatchASID, Match0, Match1; + + assign MatchASID = (ASID == Key_ASID) | PTE_G; generate if (`XLEN == 32) begin - assign {Key1, Key0} = Key; + assign {Key_ASID, Key1, Key0} = Key; assign {Query1, Query0} = VirtualPageNumber; // Calculate the actual match value based on the input vpn and the page type. @@ -82,29 +78,28 @@ module camline #(parameter KEY_BITS = 20, logic Match2, Match3; assign {Query3, Query2, Query1, Query0} = VirtualPageNumber; - assign {Key3, Key2, Key1, Key0} = Key; + assign {Key_ASID, Key3, Key2, Key1, Key0} = Key; // Calculate the actual match value based on the input vpn and the page type. - // For example, a gigapage in SV only cares about VPN[2], so VPN[0] and VPN[1] + // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] // should automatically match. assign Match0 = (Query0 == Key0) || (PageType > 2'd0); // least signifcant section assign Match1 = (Query1 == Key1) || (PageType > 2'd1); assign Match2 = (Query2 == Key2) || (PageType > 2'd2); - assign Match3 = (Query3 == Key3); // *** this should always match in sv39 since both vPN3 and key3 are zeroed by the pagetable walker before getting to the cam + assign Match3 = (Query3 == Key3); // this should always match in sv39 since both vPN3 and key3 are zeroed by the pagetable walker before getting to the cam assign Match = Match0 & Match1 & Match2 & Match3 & Valid; end endgenerate // On a write, update the type of the page referred to by this line. - flopenr #(2) pagetypeflop(clk, reset, CAMLineWrite, PageTypeWriteVal, PageType); - //mux2 #(2) pagetypemux(StoredPageType, PageTypeWrite, CAMLineWrite, PageType); + flopenr #(2) pagetypeflop(clk, reset, WriteEnable, PageTypeWriteVal, PageType); + assign PageTypeRead = PageType & {2{Match}}; // On a write, set the valid bit high and update the stored key. // On a flush, zero the valid bit and leave the key unchanged. // *** Might we want to update stored key right away to output match on the // write cycle? (using a mux) - flopenrc #(1) validbitflop(clk, reset, TLBFlush, CAMLineWrite, 1'b1, Valid); - flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key); - + flopenrc #(1) validbitflop(clk, reset, TLBFlush, WriteEnable, 1'b1, Valid); + flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, {ASID, VirtualPageNumber}, Key); endmodule diff --git a/wally-pipelined/src/mmu/tlblru.sv b/wally-pipelined/src/mmu/tlblru.sv index e86598f71..ae933f805 100644 --- a/wally-pipelined/src/mmu/tlblru.sv +++ b/wally-pipelined/src/mmu/tlblru.sv @@ -24,46 +24,27 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -module tlblru #(parameter ENTRY_BITS = 3) ( - input logic clk, reset, - input logic TLBWrite, - input logic TLBFlush, - input logic [ENTRY_BITS-1:0] VPNIndex, - input logic CAMHit, - input logic [2**ENTRY_BITS-1:0] WriteLines, - - output logic [ENTRY_BITS-1:0] WriteIndex +module tlblru #(parameter TLB_ENTRIES = 8) ( + input logic clk, reset, + input logic TLBWrite, + input logic TLBFlush, + input logic [TLB_ENTRIES-1:0] ReadLines, + input logic CAMHit, + output logic [TLB_ENTRIES-1:0] WriteLines ); - localparam NENTRIES = 2**ENTRY_BITS; - - // Keep a "recently-used" record for each TLB entry. On access, set to 1 - logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; - - // One-hot encodings of which line is being accessed - logic [NENTRIES-1:0] ReadLineOneHot, AccessLineOneHot; - - // High if the next access causes all RU bits to be 1 - logic AllUsed; - - // Convert indices to one-hot encodings - decoder #(ENTRY_BITS) readdecoder(VPNIndex, ReadLineOneHot); + logic [TLB_ENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; + logic [TLB_ENTRIES-1:0] AccessLines; // One-hot encodings of which line is being accessed + logic AllUsed; // High if the next access causes all RU bits to be 1 // Find the first line not recently used - priorityencoder #(ENTRY_BITS) firstnru(~RUBits, WriteIndex); + tlbpriority #(TLB_ENTRIES) nru(~RUBits, WriteLines); - // Access either the hit line or written line - assign AccessLineOneHot = (TLBWrite) ? WriteLines : ReadLineOneHot; - - // Raise the bit of the recently accessed line - assign RUBitsAccessed = AccessLineOneHot | RUBits; - - // Determine whether we need to reset the RU bits to all zeroes - assign AllUsed = &(RUBitsAccessed); - assign RUBitsNext = (AllUsed) ? AccessLineOneHot : RUBitsAccessed; - - // Update LRU state on any TLB hit or write - flopenrc #(NENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), - RUBitsNext, RUBits); + // Track recently used lines, updating on a CAM Hit or TLB write + assign AccessLines = TLBWrite ? WriteLines : ReadLines; + assign RUBitsAccessed = AccessLines | RUBits; + assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none + assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; + flopenrc #(TLB_ENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), RUBitsNext, RUBits); endmodule diff --git a/wally-pipelined/src/mmu/physicalpagemask.sv b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv similarity index 88% rename from wally-pipelined/src/mmu/physicalpagemask.sv rename to wally-pipelined/src/mmu/tlbphysicalpagemask.sv index b1f77e2d6..b45237c51 100644 --- a/wally-pipelined/src/mmu/physicalpagemask.sv +++ b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// physicalpagemask.sv +// tlbphysicalpagemask.sv // // Written: David Harris and kmacsaigoren@hmc.edu 7 June 2021 // Modified: @@ -28,11 +28,10 @@ `include "wally-config.vh" -module physicalpagemask ( - input logic [`VPN_BITS-1:0] VPN, - input logic [`PPN_BITS-1:0] PPN, - input logic [1:0] PageType, - +module tlbphysicalpagemask ( + input logic [`VPN_BITS-1:0] VPN, + input logic [`PPN_BITS-1:0] PPN, + input logic [1:0] PageType, output logic [`PPN_BITS-1:0] MixedPageNumber ); @@ -40,13 +39,11 @@ module physicalpagemask ( logic [`PPN_BITS-1:0] ZeroExtendedVPN; logic [`PPN_BITS-1:0] PageNumberMask; - assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. - generate if (`XLEN == 32) begin always_comb case (PageType[0]) - // *** the widths of these constansts are hardocded here to match `PPN_BITS in the wally-constants file. + // the widths of these constansts are hardocded here to match `PPN_BITS in the wally-constants file. 0: PageNumberMask = 22'h3FFFFF; // kilopage: 22 bits of PPN, 0 bits of VPN 1: PageNumberMask = 22'h3FFC00; // megapage: 12 bits of PPN, 10 bits of VPN endcase @@ -57,7 +54,7 @@ module physicalpagemask ( 1: PageNumberMask = 44'hFFFFFFFFE00; // megapage: 35 bits of PPN, 9 bits of VPN 2: PageNumberMask = 44'hFFFFFFC0000; // gigapage: 26 bits of PPN, 18 bits of VPN 3: PageNumberMask = 44'hFFFF8000000; // terapage: 17 bits of PPN, 27 bits of VPN - // *** make sure that this doesnt break when using sv39. In that case, all of these + // Bus widths accomodate SV48. In SV39, all of these // busses are the widths for sv48, but extra bits should be zeroed out by the mux // in the tlb when it generates VPN from the full virtualadress. endcase @@ -65,6 +62,7 @@ module physicalpagemask ( endgenerate // merge low segments of VPN with high segments of PPN decided by the pagetype. + assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. assign MixedPageNumber = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); endmodule diff --git a/wally-pipelined/src/mmu/priorityencoder.sv b/wally-pipelined/src/mmu/tlbpriority.sv similarity index 62% rename from wally-pipelined/src/mmu/priorityencoder.sv rename to wally-pipelined/src/mmu/tlbpriority.sv index d56da3d65..5096cae60 100644 --- a/wally-pipelined/src/mmu/priorityencoder.sv +++ b/wally-pipelined/src/mmu/tlbpriority.sv @@ -1,16 +1,15 @@ /////////////////////////////////////////// -// priorityencoder.sv +// tlbpriority.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 -// Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/ -// *** Give proper LGPL attribution for above source // Modified: Teo Ene 15 Apr 2021: // Temporarily removed paramterized priority encoder for non-parameterized one // To get synthesis working quickly // Kmacsaigoren@hmc.edu 28 May 2021: // Added working version of parameterized priority encoder. +// David_Harris@Hmc.edu switched to one-hot output // -// Purpose: One-hot encoding to binary encoder +// Purpose: Priority circuit to choose most significant one-hot output // // A component of the Wally configurable RISC-V project. // @@ -31,35 +30,21 @@ `include "wally-config.vh" -module priorityencoder #(parameter BINARY_BITS = 3) ( - input logic [2**BINARY_BITS - 1:0] onehot, - output logic [BINARY_BITS - 1:0] binary +module tlbpriority #(parameter ENTRIES = 8) ( + input logic [ENTRIES-1:0] a, + output logic [ENTRIES-1:0] y ); + // verilator lint_off UNOPTFLAT + logic [ENTRIES-1:0] nolower; - integer i; - always_comb begin - binary = 0; - for (i = 0; i < 2**BINARY_BITS; i++) begin - // verilator lint_off WIDTH - if (onehot[i]) binary = i; // prioritizes the most significant bit - // verilator lint_on WIDTH + // generate thermometer code mask + genvar i; + generate + assign nolower[0] = 1; + for (i=1; i= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin - if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0]; - else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]}; + if (`XLEN==64) begin + entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64 + CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4], + PMPCFG_ARRAY_REGW[entry+3],PMPCFG_ARRAY_REGW[entry+2],PMPCFG_ARRAY_REGW[entry+1],PMPCFG_ARRAY_REGW[entry]}; + end else begin + entry = (CSRAdrM - PMPCFG0)*4; + CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+3],PMPCFG_ARRAY_REGW[entry+2],PMPCFG_ARRAY_REGW[entry+1],PMPCFG_ARRAY_REGW[entry]}; + end + + /* + if (~CSRAdrM[0]) CSRMReadValM = {PMPCFG_ARRAY_REGW[]}; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]};*/ end else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; @@ -212,26 +210,7 @@ module csrm #(parameter MTVAL: CSRMReadValM = MTVAL_REGW; MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; -/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; - PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]}; - PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0]; - PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]}; - PMPADDR0: CSRMReadValM = PMPADDR_ARRAY_REGW[0]; // *** make configurable - PMPADDR1: CSRMReadValM = PMPADDR_ARRAY_REGW[1]; - PMPADDR2: CSRMReadValM = PMPADDR_ARRAY_REGW[2]; - PMPADDR3: CSRMReadValM = PMPADDR_ARRAY_REGW[3]; - PMPADDR4: CSRMReadValM = PMPADDR_ARRAY_REGW[4]; - PMPADDR5: CSRMReadValM = PMPADDR_ARRAY_REGW[5]; - PMPADDR6: CSRMReadValM = PMPADDR_ARRAY_REGW[6]; - PMPADDR7: CSRMReadValM = PMPADDR_ARRAY_REGW[7]; - PMPADDR8: CSRMReadValM = PMPADDR_ARRAY_REGW[8]; - PMPADDR9: CSRMReadValM = PMPADDR_ARRAY_REGW[9]; - PMPADDR10: CSRMReadValM = PMPADDR_ARRAY_REGW[10]; - PMPADDR11: CSRMReadValM = PMPADDR_ARRAY_REGW[11]; - PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12]; - PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13]; - PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14]; - PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */ + default: begin CSRMReadValM = 0; IllegalCSRMAccessM = 1; diff --git a/wally-pipelined/src/privileged/csrn.sv b/wally-pipelined/src/privileged/csrn.sv index e82ff59f3..16d5df8a3 100644 --- a/wally-pipelined/src/privileged/csrn.sv +++ b/wally-pipelined/src/privileged/csrn.sv @@ -49,7 +49,7 @@ module csrn #(parameter ); // User mode CSRs below only needed when user mode traps are supported - generate + generate if (`N_SUPPORTED) begin logic WriteUTVECM; logic WriteUSCRATCHM, WriteUEPCM; diff --git a/wally-pipelined/src/privileged/csrs.sv b/wally-pipelined/src/privileged/csrs.sv index 0afe70912..ca64b053a 100644 --- a/wally-pipelined/src/privileged/csrs.sv +++ b/wally-pipelined/src/privileged/csrs.sv @@ -66,7 +66,7 @@ module csrs #(parameter //logic [`XLEN-1:0] SEDELEG_MASK = ~(zero | 3'b111 << 9); // sedeleg[11:9] hardwired to zero per Privileged Spec 3.1.8 // Supervisor mode CSRs sometimes supported - generate + generate if (`S_SUPPORTED) begin logic WriteSTVECM; logic WriteSSCRATCHM, WriteSEPCM; diff --git a/wally-pipelined/src/privileged/csru.sv b/wally-pipelined/src/privileged/csru.sv index 2e48731da..08e682bfd 100644 --- a/wally-pipelined/src/privileged/csru.sv +++ b/wally-pipelined/src/privileged/csru.sv @@ -43,7 +43,7 @@ module csru #(parameter ); // Floating Point CSRs in User Mode only needed if Floating Point is supported - generate + generate if (`F_SUPPORTED | `D_SUPPORTED) begin logic [4:0] FFLAGS_REGW; logic WriteFFLAGSM, WriteFRMM; //, WriteFCSRM; diff --git a/wally-pipelined/src/privileged/privdec.sv b/wally-pipelined/src/privileged/privdec.sv index 1330a62bc..621ef9a2c 100644 --- a/wally-pipelined/src/privileged/privdec.sv +++ b/wally-pipelined/src/privileged/privdec.sv @@ -38,9 +38,9 @@ module privdec ( // xRET defined in Privileged Spect 3.2.2 assign uretM = PrivilegedM & (InstrM[31:20] == 12'b000000000010) & `N_SUPPORTED; - assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & `S_SUPPORTED && + assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & `S_SUPPORTED & PrivilegeModeW[0] & ~STATUS_TSR; - assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) && (PrivilegeModeW == `M_MODE); + assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) & (PrivilegeModeW == `M_MODE); assign ecallM = PrivilegedM & (InstrM[31:20] == 12'b000000000000); assign ebreakM = PrivilegedM & (InstrM[31:20] == 12'b000000000001); diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 5ed8c8807..90830137b 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -64,11 +64,12 @@ module privileged ( input logic PMALoadAccessFaultM, PMPLoadAccessFaultM, input logic PMAStoreAccessFaultM, PMPStoreAccessFaultM, - output logic IllegalFPUInstrE, + output logic IllegalFPUInstrE, output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, - output logic STATUS_MXR, STATUS_SUM, - output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + output logic [1:0] STATUS_MPP, + output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW ); @@ -94,8 +95,7 @@ module privileged ( logic MTrapM, STrapM, UTrapM; logic InterruptM; - logic [1:0] STATUS_MPP; - logic STATUS_SPP, STATUS_TSR, STATUS_MPRV; // **** status mprv is unused outside of the csr module as of 4 June 2021. should it be deleted alltogether from the module, or should I leav the pin here in case someone needs it? + logic STATUS_SPP, STATUS_TSR; logic STATUS_MIE, STATUS_SIE; logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW; logic md, sd; diff --git a/wally-pipelined/src/uncore/clint.sv b/wally-pipelined/src/uncore/clint.sv index d2014468a..80cb28a1d 100644 --- a/wally-pipelined/src/uncore/clint.sv +++ b/wally-pipelined/src/uncore/clint.sv @@ -94,7 +94,7 @@ module clint ( if (~HRESETn) begin MTIME <= 0; // MTIMECMP is not reset - end else if (memwrite && entryd == 16'hBFF8) begin + end else if (memwrite & entryd == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed MTIME <= HWDATA; end else MTIME <= MTIME + 1; @@ -125,9 +125,9 @@ module clint ( if (~HRESETn) begin MTIME <= 0; // MTIMECMP is not reset - end else if (memwrite && (entryd == 16'hBFF8)) begin + end else if (memwrite & (entryd == 16'hBFF8)) begin MTIME[31:0] <= HWDATA; - end else if (memwrite && (entryd == 16'hBFFC)) begin + end else if (memwrite & (entryd == 16'hBFFC)) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed MTIME[63:32]<= HWDATA; end else MTIME <= MTIME + 1; diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 408645675..c195c5516 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -102,13 +102,13 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( always_ff @(posedge HCLK) begin HWADDR <= #1 A; HREADTim0 <= #1 RAM[A[31:3]]; - if (memwrite && risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA; + if (memwrite & risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA; end end else begin always_ff @(posedge HCLK) begin HWADDR <= #1 A; HREADTim0 <= #1 RAM[A[31:2]]; - if (memwrite && risingHREADYTim) RAM[HWADDR[31:2]] <= #1 HWDATA; + if (memwrite & risingHREADYTim) RAM[HWADDR[31:2]] <= #1 HWDATA; end end endgenerate diff --git a/wally-pipelined/src/uncore/gpio.sv b/wally-pipelined/src/uncore/gpio.sv index bddec8e63..0100c9c56 100644 --- a/wally-pipelined/src/uncore/gpio.sv +++ b/wally-pipelined/src/uncore/gpio.sv @@ -131,33 +131,32 @@ module gpio ( default: Dout <= #1 0; endcase // interrupts - if (memwrite && (entryd == 8'h1C)) - rise_ip <= rise_ip & ~Din | (input2d & ~input3d); + if (memwrite & (entryd == 8'h1C)) + rise_ip <= rise_ip & ~Din; else rise_ip <= rise_ip | (input2d & ~input3d); - if (memwrite && (entryd == 8'h24)) - fall_ip <= fall_ip & ~Din | (~input2d & input3d); + if (memwrite & (entryd == 8'h24)) + fall_ip <= fall_ip & ~Din; else fall_ip <= fall_ip | (~input2d & input3d); - if (memwrite && (entryd == 8'h2C)) - high_ip <= high_ip & ~Din | input3d; + if (memwrite & (entryd == 8'h2C)) + high_ip <= high_ip & ~Din; else high_ip <= high_ip | input3d; - if (memwrite && (entryd == 8'h34)) - low_ip <= low_ip & ~Din | ~input3d; + if (memwrite & (entryd == 8'h34)) + low_ip <= low_ip & ~Din; else low_ip <= low_ip | ~input3d; end end // chip i/o - generate + generate if (`GPIO_LOOPBACK_TEST) // connect OUT to IN for loopback testing assign input0d = GPIOPinsOut & input_en & output_en; else assign input0d = GPIOPinsIn & input_en; endgenerate - // *** this costs lots of flops; I suspect they don't need to be resettable, do they? flop #(32) sync1(HCLK,input0d,input1d); flop #(32) sync2(HCLK,input1d,input2d); flop #(32) sync3(HCLK,input2d,input3d); diff --git a/wally-pipelined/src/uncore/imem.sv b/wally-pipelined/src/uncore/imem.sv deleted file mode 100644 index 85362edf7..000000000 --- a/wally-pipelined/src/uncore/imem.sv +++ /dev/null @@ -1,71 +0,0 @@ -/////////////////////////////////////////// -// imem.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module imem ( - input logic [`XLEN-1:1] AdrF, - output logic [31:0] InstrF, - output logic [15:0] rd2, // bogus, delete when real multicycle fetch works - output logic InstrAccessFaultF); - - /* verilator lint_off UNDRIVEN */ - logic [`XLEN-1:0] RAM[`TIM_BASE>>(1+`XLEN/32):(`TIM_RANGE+`TIM_BASE)>>(1+`XLEN/32)]; - logic [`XLEN-1:0] bootram[`BOOTTIM_BASE>>(1+`XLEN/32):(`BOOTTIM_RANGE+`BOOTTIM_BASE)>>(1+`XLEN/32)]; - /* verilator lint_on UNDRIVEN */ - logic [31:0] adrbits; // needs to be 32 bits to index RAM - logic [`XLEN-1:0] rd; -// logic [15:0] rd2; - - generate - if (`XLEN==32) assign adrbits = AdrF[31:2]; - else assign adrbits = AdrF[31:3]; - endgenerate - - assign #2 rd = (AdrF < (`TIM_BASE >> 1)) ? bootram[adrbits] : RAM[adrbits]; // busybear: 2 memory options - - // hack right now for unaligned 32-bit instructions - // eventually this will need to cause a stall like a cache miss - // when the instruction wraps around a cache line - // could be optimized to only stall when the instruction wrapping is 32 bits - assign #2 rd2 = (AdrF < (`TIM_BASE >> 1)) ? bootram[adrbits+1][15:0] : RAM[adrbits+1][15:0]; //busybear: 2 memory options - generate - if (`XLEN==32) begin - assign InstrF = AdrF[1] ? {rd2[15:0], rd[31:16]} : rd; - // First, AdrF needs to get its last bit appended back onto it - // Then not-XORing it with TIM_BASE checks if it matches TIM_BASE exactly - // Then ORing it with TIM_RANGE introduces some leeway into the previous check, by allowing the lower bits to be either high or low - - assign InstrAccessFaultF = (~&(({AdrF,1'b0} ~^ `TIM_BASE) | `TIM_RANGE)) & (~&(({AdrF,1'b0} ~^ `BOOTTIM_BASE) | `BOOTTIM_RANGE)); - - end else begin - assign InstrF = AdrF[2] ? (AdrF[1] ? {rd2[15:0], rd[63:48]} : rd[63:32]) - : (AdrF[1] ? rd[47:16] : rd[31:0]); - // - assign InstrAccessFaultF = (|AdrF[`XLEN-1:32] | ~&({AdrF[31:1],1'b0} ~^ `TIM_BASE | `TIM_RANGE)) & (|AdrF[`XLEN-1:32] | ~&({AdrF[31:1],1'b0} ~^ `BOOTTIM_BASE | `BOOTTIM_RANGE)); - end - endgenerate -endmodule - diff --git a/wally-pipelined/src/uncore/plic.sv b/wally-pipelined/src/uncore/plic.sv index dc50eb4f1..ef7ecdd5b 100644 --- a/wally-pipelined/src/uncore/plic.sv +++ b/wally-pipelined/src/uncore/plic.sv @@ -164,17 +164,13 @@ module plic ( flopr #(N) intPendingFlop(HCLK,~HRESETn,nextIntPending,intPending); // pending array - indexed by priority_lvl x source_ID - genvar i; + genvar i, j; generate - for (i=1; i<=N; i=i+1) begin - // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) - assign pendingArray[7][i] = (intPriority[i]==7) & intEn[i] & intPending[i]; - assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; - assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; - assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; - assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; - assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; - assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; + for (j=1; j<=7; j++) begin: pending + for (i=1; i<=N; i=i+1) begin: pendingbit + // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) + assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; + end end endgenerate // pending array, except grouped by priority @@ -184,7 +180,9 @@ module plic ( |pendingArray[4], |pendingArray[3], |pendingArray[2], - |pendingArray[1]}; + |pendingArray[1]}; + //assign pendingPGrouped = pendingArray.or; + // pendingPGrouped, except only topmost priority is active assign pendingMaxP[7:1] = {pendingPGrouped[7], pendingPGrouped[6] & ~|pendingPGrouped[7], @@ -202,24 +200,24 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - integer j; - // *** verify that this synthesizes to a reasonable priority encoder and that j doesn't actually exist in hardware + int k; + // *** verify that this synthesizes to a reasonable priority encoder and that k doesn't actually exist in hardware always_comb begin intClaim = 6'b0; - for(j=N; j>0; j=j-1) begin - if(pendingRequestsAtMaxP[j]) intClaim = j[5:0]; + for(k=N; k>0; k=k-1) begin + if(pendingRequestsAtMaxP[k]) intClaim = k[5:0]; end end // create threshold mask - always_comb begin - threshMask[7] = ~(7==intThreshold); - threshMask[6] = ~(6==intThreshold) & threshMask[7]; - threshMask[5] = ~(5==intThreshold) & threshMask[6]; - threshMask[4] = ~(4==intThreshold) & threshMask[5]; - threshMask[3] = ~(3==intThreshold) & threshMask[4]; - threshMask[2] = ~(2==intThreshold) & threshMask[3]; - threshMask[1] = ~(1==intThreshold) & threshMask[2]; + always_comb begin + threshMask[7] = (intThreshold != 7); + threshMask[6] = (intThreshold != 6) & threshMask[7]; + threshMask[5] = (intThreshold != 5) & threshMask[6]; + threshMask[4] = (intThreshold != 4) & threshMask[5]; + threshMask[3] = (intThreshold != 3) & threshMask[4]; + threshMask[2] = (intThreshold != 2) & threshMask[3]; + threshMask[1] = (intThreshold != 1) & threshMask[2]; end // is the max priority > threshold? // *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index 4ead3f679..badc41973 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -224,11 +224,11 @@ module uartPC16550D( else rxstate <= #1 UART_IDLE; end // timeout counting - if (~MEMRb && A == 3'b000 && ~DLAB) rxtimeoutcnt <= #1 0; // reset timeout on read + if (~MEMRb & A == 3'b000 & ~DLAB) rxtimeoutcnt <= #1 0; // reset timeout on read else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // *** not right end - assign rxcentered = rxbaudpulse && (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE + assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE assign rxbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1; // start bit + data bits + (parity bit) + stop bit /////////////////////////////////////////// @@ -267,12 +267,12 @@ module uartPC16550D( rxfifohead <= #1 rxfifohead + 1; end rxdataready <= #1 1; - end else if (~MEMRb && A == 3'b000 && ~DLAB) begin // reading RBR updates ready / pops fifo + end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo if (fifoenabled) begin rxfifotail <= #1 rxfifotail + 1; if (rxfifohead == rxfifotail +1) rxdataready <= #1 0; end else rxdataready <= #1 0; - end else if (~MEMWb && A == 3'b010) // writes to FIFO Control Register + end else if (~MEMWb & A == 3'b010) // writes to FIFO Control Register if (Din[1] | ~Din[0]) begin // rx FIFO reset or FIFO disable clears FIFO contents rxfifohead <= #1 0; rxfifotail <= #1 0; end @@ -291,7 +291,7 @@ module uartPC16550D( // although rxfullbit looks like a combinational loop, in one bit rxfifotail == i and breaks the loop generate genvar i; - for (i=0; i<16; i++) begin + for (i=0; i<16; i++) begin:rx assign RXerrbit[i] = |rxfifo[i][10:8]; // are any of the error conditions set? if (i > 0) assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); @@ -326,7 +326,7 @@ module uartPC16550D( txoversampledcnt <= #1 0; txstate <= #1 UART_IDLE; txbitssent <= #1 0; - end else if ((txstate == UART_IDLE) && txsrfull) begin // start transmitting + end else if ((txstate == UART_IDLE) & txsrfull) begin // start transmitting txstate <= #1 UART_ACTIVE; txoversampledcnt <= #1 1; txbitssent <= #1 0; @@ -341,7 +341,7 @@ module uartPC16550D( end assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - assign txnextbit = txbaudpulse && (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE + assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE /////////////////////////////////////////// // transmit holding register, shift register, FIFO @@ -372,7 +372,7 @@ module uartPC16550D( if (~HRESETn) begin txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff; end else begin - if (~MEMWb && A == 3'b000 && ~DLAB) begin // writing transmit holding register or fifo + if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo if (fifoenabled) begin txfifo[txfifohead] <= #1 Din; txfifohead <= #1 txfifohead + 1; @@ -395,8 +395,8 @@ module uartPC16550D( txsrfull <= #1 1; end end else if (txstate == UART_DONE) txsrfull <= #1 0; // done transmitting shift register - else if (txstate == UART_ACTIVE && txnextbit) txsr <= #1 {txsr[10:0], 1'b1}; // shift txhr - if (!MEMWb && A == 3'b010) // writes to FIFO control register + else if (txstate == UART_ACTIVE & txnextbit) txsr <= #1 {txsr[10:0], 1'b1}; // shift txhr + if (!MEMWb & A == 3'b010) // writes to FIFO control register if (Din[2] | ~Din[0]) begin // tx FIFO reste or FIFO disable clears FIFO contents txfifohead <= #1 0; txfifotail <= #1 0; end diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index 79f7a0e86..e5f5fdd7d 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -62,13 +62,14 @@ module uncore ( logic [`XLEN-1:0] HWDATA; logic [`XLEN-1:0] HREADTim, HREADCLINT, HREADPLIC, HREADGPIO, HREADUART; - logic [5:0] HSELRegions; + logic [6:0] HSELRegions; logic HSELTim, HSELCLINT, HSELPLIC, HSELGPIO, PreHSELUART, HSELUART; logic HSELTimD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD; logic HRESPTim, HRESPCLINT, HRESPPLIC, HRESPGPIO, HRESPUART; logic HREADYTim, HREADYCLINT, HREADYPLIC, HREADYGPIO, HREADYUART; logic [`XLEN-1:0] HREADBootTim; logic HSELBootTim, HSELBootTimD, HRESPBootTim, HREADYBootTim; + logic HSELNoneD; logic [1:0] MemRWboottim; logic UARTIntr,GPIOIntr; @@ -78,7 +79,7 @@ module uncore ( adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals - assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions; + assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions[5:0]; // subword accesses: converts HWDATAIN to HWDATA subwordwrite sww(.*); @@ -134,19 +135,10 @@ module uncore ( HSELPLICD & HREADYPLIC | HSELGPIOD & HREADYGPIO | HSELBootTimD & HREADYBootTim | - HSELUARTD & HREADYUART; - - /* PMA checker now handles access faults. *** This can be deleted - // Faults - assign DataAccessFaultM = ~(HSELTimD | HSELCLINTD | HSELPLICD | HSELGPIOD | HSELBootTimD | HSELUARTD); - */ + HSELUARTD & HREADYUART | + HSELNoneD; // don't lock up the bus if no region is being accessed // Address Decoder Delay (figure 4-2 in spec) - flopr #(1) hseltimreg(HCLK, ~HRESETn, HSELTim, HSELTimD); - flopr #(1) hselclintreg(HCLK, ~HRESETn, HSELCLINT, HSELCLINTD); - flopr #(1) hselplicreg(HCLK, ~HRESETn, HSELPLIC, HSELPLICD); - flopr #(1) hselgpioreg(HCLK, ~HRESETn, HSELGPIO, HSELGPIOD); - flopr #(1) hseluartreg(HCLK, ~HRESETn, HSELUART, HSELUARTD); - flopr #(1) hselboottimreg(HCLK, ~HRESETn, HSELBootTim, HSELBootTimD); + flopr #(7) hseldelayreg(HCLK, ~HRESETn, HSELRegions, {HSELNoneD, HSELBootTimD, HSELTimD, HSELCLINTD, HSELGPIOD, HSELUARTD, HSELPLICD}); endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 978f747fa..f18d5af43 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -112,7 +112,8 @@ module wallypipelinedhart logic ITLBMissF, ITLBHitF; logic DTLBMissM, DTLBHitM; logic [`XLEN-1:0] SATP_REGW; - logic STATUS_MXR, STATUS_SUM; + logic STATUS_MXR, STATUS_SUM, STATUS_MPRV; + logic [1:0] STATUS_MPP; logic [1:0] PrivilegeModeW; logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; logic [1:0] PageTypeF, PageTypeM; @@ -123,17 +124,12 @@ module wallypipelinedhart logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; logic DSquashBusAccessM, ISquashBusAccessF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; + var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0]; // IMem stalls logic ICacheStallF; logic DCacheStall; - logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; - logic MMUStall; - logic MMUTranslate, MMUReady; - logic HPTWRead; - logic HPTWReadyfromLSU; - logic HPTWStall; + // bus interface to dmem @@ -146,7 +142,6 @@ module wallypipelinedhart logic [`PA_BITS-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; - logic DataStall; logic InstrAckF, MemAckW; logic CommitM, CommittedM; @@ -163,9 +158,8 @@ module wallypipelinedhart logic [`XLEN-1:0] HRDATAW; // IEU vs HPTW arbitration signals to send to LSU - logic DisableTranslation; logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -175,7 +169,7 @@ module wallypipelinedhart logic DataMisalignedMfromLSU; logic StallWtoLSU; logic StallWfromLSU; - logic [2:0] Funct3MfromLSU; + logic [2:0] SizeFromLSU; ifu ifu(.InstrInF(InstrRData), @@ -187,71 +181,87 @@ module wallypipelinedhart // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), - .*); // can send addresses to ahblite, send out pagetablestall - // arbiter between IEU and pagetablewalker - lsuArb arbiter(// HPTW connection - .HPTWTranslate(MMUTranslate), - .HPTWRead(HPTWRead), - .HPTWPAdr(MMUPAdr), - .HPTWReadPTE(MMUReadPTE), - .HPTWReady(MMUReady), - .HPTWStall(HPTWStall), - // CPU connection - .MemRWM(MemRWM), - .Funct3M(Funct3M), - .AtomicM(AtomicM), - .MemAdrM(MemAdrM), - .StallW(StallW), - .WriteDataM(WriteDataM), - .ReadDataW(ReadDataW), - .CommittedM(CommittedM), - .SquashSCW(SquashSCW), - .DataMisalignedM(DataMisalignedM), - .DCacheStall(DCacheStall), - // LSU - .DisableTranslation(DisableTranslation), - .MemRWMtoLSU(MemRWMtoLSU), - .Funct3MtoLSU(Funct3MtoLSU), - .AtomicMtoLSU(AtomicMtoLSU), - .MemAdrMtoLSU(MemAdrMtoLSU), - .WriteDataMtoLSU(WriteDataMtoLSU), - .StallWtoLSU(StallWtoLSU), - .CommittedMfromLSU(CommittedMfromLSU), - .SquashSCWfromLSU(SquashSCWfromLSU), - .DataMisalignedMfromLSU(DataMisalignedMfromLSU), - .ReadDataWFromLSU(ReadDataWFromLSU), - .HPTWReadyfromLSU(HPTWReadyfromLSU), - .DataStall(DataStall), - .*); + lsu lsu(.clk(clk), + .reset(reset), + .StallM(StallM), + .FlushM(FlushM), + .StallW(StallW), + .FlushW(FlushW), + // connected to arbiter (reconnect to CPU) + .MemRWM(MemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .MemAdrM(MemAdrM), + .WriteDataM(WriteDataM), + .ReadDataW(ReadDataW), - lsu lsu(.MemRWM(MemRWMtoLSU), - .Funct3M(Funct3MtoLSU), - .AtomicM(AtomicMtoLSU), - .MemAdrM(MemAdrMtoLSU), - .WriteDataM(WriteDataMtoLSU), - .ReadDataW(ReadDataWFromLSU), - .StallW(StallWtoLSU), + // connected to ahb (all stay the same) + .CommitM(CommitM), + .MemPAdrM(MemPAdrM), + .MemReadM(MemReadM), + .MemWriteM(MemWriteM), + .AtomicMaskedM(AtomicMaskedM), + .MemAckW(MemAckW), + .HRDATAW(HRDATAW), + .SizeFromLSU(SizeFromLSU), // stays the same + .StallWfromLSU(StallWfromLSU), // stays the same + .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? + // currently not connected (but will need to be used for lsu talking to ahb. + .HADDR(HADDR), + .HSIZE(HSIZE), + .HBURST(HBURST), + .HWRITE(HWRITE), - .CommittedM(CommittedMfromLSU), - .SquashSCW(SquashSCWfromLSU), - .DataMisalignedM(DataMisalignedMfromLSU), - .DisableTranslation(DisableTranslation), + // connect to csr or privilege and stay the same. + .PrivilegeModeW(PrivilegeModeW), // connects to csr + .PMPCFG_ARRAY_REGW(PMPCFG_ARRAY_REGW), // connects to csr + .PMPADDR_ARRAY_REGW(PMPADDR_ARRAY_REGW), // connects to csr + // hptw keep i/o + .SATP_REGW(SATP_REGW), // from csr + .STATUS_MXR(STATUS_MXR), // from csr + .STATUS_SUM(STATUS_SUM), // from csr + .STATUS_MPRV(STATUS_MPRV), // from csr + .STATUS_MPP(STATUS_MPP), // from csr + + .DTLBFlushM(DTLBFlushM), // connects to privilege + .NonBusTrapM(NonBusTrapM), // connects to privilege + .DTLBLoadPageFaultM(DTLBLoadPageFaultM), // connects to privilege + .DTLBStorePageFaultM(DTLBStorePageFaultM), // connects to privilege + .LoadMisalignedFaultM(LoadMisalignedFaultM), // connects to privilege + .LoadAccessFaultM(LoadAccessFaultM), // connects to privilege + .StoreMisalignedFaultM(StoreMisalignedFaultM), // connects to privilege + .StoreAccessFaultM(StoreAccessFaultM), // connects to privilege + .PMALoadAccessFaultM(PMALoadAccessFaultM), + .PMAStoreAccessFaultM(PMAStoreAccessFaultM), + .PMPLoadAccessFaultM(PMPLoadAccessFaultM), + .PMPStoreAccessFaultM(PMPStoreAccessFaultM), + + // connected to hptw. Move to internal. + .PCF(PCF), + .ITLBMissF(ITLBMissF), + .PageTableEntryF(PageTableEntryF), + .PageTypeF(PageTypeF), + .ITLBWriteF(ITLBWriteF), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM), + + .DTLBHitM(DTLBHitM), // not connected remove + + .DCacheStall(DCacheStall)) // change to DCacheStall + ; - .DataStall(DataStall), - .HPTWReady(HPTWReadyfromLSU), - .Funct3MfromLSU(Funct3MfromLSU), - .StallWfromLSU(StallWfromLSU), -// .DataStall(LSUStall), - .* ); // data cache unit ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking .WriteDataM(WriteDataM), - .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), + .MemSizeM(SizeFromLSU[1:0]), .UnsignedLoadM(SizeFromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), .StallW(StallWfromLSU), diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 11b8e5620..2cf37c17f 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -514,12 +514,16 @@ string tests32f[] = '{ logic HMASTLOCK; logic HCLK, HRESETn; logic [`XLEN-1:0] PCW; + + logic [`XLEN-1:0] debug; + assign debug = dut.uncore.dtim.RAM[536872960]; flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW); // check assertions for a legal configuration riscvassertions riscvassertions(); + logging logging(clk, reset, dut.uncore.HADDR, dut.uncore.HTRANS); // pick tests based on modes supported initial begin @@ -655,10 +659,7 @@ string tests32f[] = '{ // Check errors errors = (i == SIGNATURESIZE+1); // error if file is empty i = 0; - if (`XLEN == 32) - testadr = (`TIM_BASE+tests[test+1].atohex())/4; - else - testadr = (`TIM_BASE+tests[test+1].atohex())/8; + testadr = (`TIM_BASE+tests[test+1].atohex())/(`XLEN/8); /* verilator lint_off INFINITELOOP */ while (signature[i] !== 'bx) begin //$display("signature[%h] = %h", i, signature[i]); @@ -668,14 +669,16 @@ string tests32f[] = '{ // kind of hacky test for garbage right now errors = errors+1; $display(" Error on test %s result %d: adr = %h sim = %h, signature = %h", - tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]); + tests[test], i, (testadr+i)*(`XLEN/8), dut.uncore.dtim.RAM[testadr+i], signature[i]); $stop;//***debug end end i = i + 1; end /* verilator lint_on INFINITELOOP */ - if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]); + if (errors == 0) begin + $display("%s succeeded. Brilliant!!!", tests[test]); + end else begin $display("%s failed with %d errors. :(", tests[test], errors); totalerrors = totalerrors+1; @@ -722,6 +725,7 @@ module riscvassertions(); // Legal number of PMP entries are 0, 16, or 64 initial begin assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float"); end endmodule @@ -949,3 +953,13 @@ module instrNameDecTB( default: name = "ILLEGAL"; endcase endmodule + +module logging( + input logic clk, reset, + input logic [31:0] HADDR, + input logic [1:0] HTRANS); + + always @(posedge clk) + if (HTRANS != 2'b00 && HADDR == 0) + $display("Warning: access to memory address 0\n"); +endmodule diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 8f8a5d442..18ef74ffd 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -334,6 +334,8 @@ module testbench(); `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); end + logging logging(clk, reset, dut.uncore.HADDR, dut.uncore.HTRANS); + // ------------------- // Additional Hardware // ------------------- @@ -718,6 +720,16 @@ module testbench(); endfunction endmodule +module logging( + input logic clk, reset, + input logic [31:0] HADDR, + input logic [1:0] HTRANS); + + always @(posedge clk) + if (HTRANS != 2'b00 && HADDR == 0) + $display("Warning: access to memory address 0\n"); +endmodule + module instrTrackerTB( input logic clk, reset,