diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index a0ea4607e..03a58d1a9 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -38,11 +38,11 @@ `define IEEE754 0 // I -`define MISA (32'h00000100 | 1 << 20 | 1 << 18 ) +`define MISA (32'h00000104) `define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 +`define ZIFENCEI_SUPPORTED 0 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 +`define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 // Microarchitectural Features @@ -50,11 +50,11 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 -`define VECTORED_INTERRUPTS_SUPPORTED 1 +`define BUS 0 +`define DCACHE 0 +`define ICACHE 0 +`define VIRTMEM_SUPPORTED 0 +`define VECTORED_INTERRUPTS_SUPPORTED 1 `define BIGENDIAN_SUPPORTED 0 // TLB configuration. Entries should be a power of 2 @@ -86,31 +86,31 @@ // Peripheral Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits -`define DTIM_SUPPORTED 1'b0 +`define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 34'h80000000 -`define DTIM_RANGE 34'h00001FFF -`define IROM_SUPPORTED 1'b0 +`define DTIM_RANGE 34'h000007FF +`define IROM_SUPPORTED 1'b1 `define IROM_BASE 34'h80000000 -`define IROM_RANGE 34'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 +`define IROM_RANGE 34'h000007FF +`define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 34'h00001000 `define BOOTROM_RANGE 34'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 +`define UNCORE_RAM_SUPPORTED 1'b0 `define UNCORE_RAM_BASE 34'h80000000 `define UNCORE_RAM_RANGE 34'h07FFFFFF `define EXT_MEM_SUPPORTED 1'b0 `define EXT_MEM_BASE 34'h80000000 `define EXT_MEM_RANGE 34'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 +`define CLINT_SUPPORTED 1'b0 `define CLINT_BASE 34'h02000000 `define CLINT_RANGE 34'h0000FFFF -`define GPIO_SUPPORTED 1'b1 +`define GPIO_SUPPORTED 1'b0 `define GPIO_BASE 34'h10060000 `define GPIO_RANGE 34'h000000FF -`define UART_SUPPORTED 1'b1 +`define UART_SUPPORTED 1'b0 `define UART_BASE 34'h10000000 `define UART_RANGE 34'h00000007 -`define PLIC_SUPPORTED 1'b1 +`define PLIC_SUPPORTED 1'b0 `define PLIC_BASE 34'h0C000000 `define PLIC_RANGE 34'h03FFFFFF `define SDC_SUPPORTED 1'b0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index d865623d6..752425782 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -37,11 +37,11 @@ // IEEE 754 compliance `define IEEE754 0 -`define MISA (32'h00000104) +`define MISA (32'h00000104 | 1 << 20 | 1 << 18 ) `define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 0 +`define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 0 +`define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 // Microarchitectural Features @@ -49,7 +49,7 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 0 +`define BUS 1 `define DCACHE 0 `define ICACHE 0 `define VIRTMEM_SUPPORTED 0 @@ -87,10 +87,10 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 34'h80000000 -`define DTIM_RANGE 34'h007FFFFF +`define DTIM_RANGE 34'h00000FFF `define IROM_SUPPORTED 1'b1 `define IROM_BASE 34'h80000000 -`define IROM_RANGE 34'h007FFFFF +`define IROM_RANGE 34'h00003FFF `define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 34'h00001000 `define BOOTROM_RANGE 34'h00000FFF @@ -103,13 +103,13 @@ `define CLINT_SUPPORTED 1'b1 `define CLINT_BASE 34'h02000000 `define CLINT_RANGE 34'h0000FFFF -`define GPIO_SUPPORTED 1'b0 +`define GPIO_SUPPORTED 1'b1 `define GPIO_BASE 34'h10060000 `define GPIO_RANGE 34'h000000FF -`define UART_SUPPORTED 1'b0 +`define UART_SUPPORTED 1'b1 `define UART_BASE 34'h10000000 `define UART_RANGE 34'h00000007 -`define PLIC_SUPPORTED 1'b0 +`define PLIC_SUPPORTED 1'b1 `define PLIC_BASE 34'h0C000000 `define PLIC_RANGE 34'h03FFFFFF `define SDC_SUPPORTED 1'b0 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh deleted file mode 100644 index b708bbb59..000000000 --- a/pipelined/config/rv64fp/wally-config.vh +++ /dev/null @@ -1,147 +0,0 @@ -////////////////////////////////////////// -// wally-config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// include shared configuration -`include "wally-shared.vh" - -`define FPGA 0 -`define QEMU 0 -`define DESIGN_COMPILER 0 - -// RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 - -// IEEE 754 compliance -`define IEEE754 0 - -// MISA RISC-V configuration per specification -// ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100101101 -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 -`define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 -`define ZFH_SUPPORTED 0 - -/// Microarchitectural Features -`define UARCH_PIPELINED 1 -`define UARCH_SUPERSCALR 0 -`define UARCH_SINGLECYCLE 0 - -// LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 -`define VECTORED_INTERRUPTS_SUPPORTED 1 -`define BIGENDIAN_SUPPORTED 1 - -// TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 32 -`define DTLB_ENTRIES 32 - -// Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines -`define DCACHE_NUMWAYS 4 -`define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_LINELENINBITS 512 -`define ICACHE_NUMWAYS 4 -`define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_LINELENINBITS 512 - -// Integer Divider Configuration -// DIV_BITSPERCYCLE must be 1, 2, or 4 -`define DIV_BITSPERCYCLE 4 - -// Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 64 - -// Address space -`define RESET_VECTOR 64'h0000000080000000 - -// Bus Interface width -`define AHBW 64 - -// WFI Timeout Wait -`define WFI_TIMEOUT_BIT 16 - -// Peripheral Physiccal Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b0 -`define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h00001FFF -`define IROM_SUPPORTED 1'b0 -`define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 -`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 -`define UNCORE_RAM_BASE 56'h80000000 -`define UNCORE_RAM_RANGE 56'h7FFFFFFF -`define EXT_MEM_SUPPORTED 1'b0 -`define EXT_MEM_BASE 56'h80000000 -`define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 56'h02000000 -`define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 56'h10060000 -`define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 -`define UART_BASE 56'h10000000 -`define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 56'h0C000000 -`define PLIC_RANGE 56'h03FFFFFF -`define SDC_SUPPORTED 1'b0 -`define SDC_BASE 56'h00012100 -`define SDC_RANGE 56'h0000001F - -// Test modes - -// Tie GPIO outputs back to inputs -`define GPIO_LOOPBACK_TEST 1 - -// Hardware configuration -`define UART_PRESCALE 1 - -// Interrupt configuration -`define PLIC_NUM_SRC 10 -// comment out the following if >=32 sources -`define PLIC_NUM_SRC_LT_32 -`define PLIC_GPIO_ID 3 -`define PLIC_UART_ID 10 - -`define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE -`define TESTSBP 0 -`define BPRED_SIZE 10 - -`define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 05af9011f..b3b547cc9 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -37,30 +37,30 @@ // IEEE 754 compliance `define IEEE754 0 -// MISA RISC-V configuration per specification I -`define MISA (32'h00000100 | 1 << 20 | 1 << 18 ) -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 +// MISA RISC-V configuration per specification +`define MISA (32'h00000104) +`define ZICSR_SUPPORTED 0 +`define ZIFENCEI_SUPPORTED 0 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 +`define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 -/// Microarchitectural Features +// Microarchitectural Features `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 +`define BUS 0 +`define DCACHE 0 +`define ICACHE 0 +`define VIRTMEM_SUPPORTED 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 `define BIGENDIAN_SUPPORTED 0 // TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 32 -`define DTLB_ENTRIES 32 +`define ITLB_ENTRIES 0 +`define DTLB_ENTRIES 0 // Cache configuration. Sizes should be a power of two // typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines @@ -76,13 +76,13 @@ `define DIV_BITSPERCYCLE 4 // Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 64 +`define PMP_ENTRIES 0 // Address space `define RESET_VECTOR 64'h0000000080000000 // Bus Interface width -`define AHBW 64 +`define AHBW (`XLEN) // WFI Timeout Wait `define WFI_TIMEOUT_BIT 16 @@ -92,31 +92,31 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b0 +`define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h00001FFF -`define IROM_SUPPORTED 1'b0 +`define DTIM_RANGE 56'h000007FF +`define IROM_SUPPORTED 1'b1 `define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 +`define IROM_RANGE 56'h000007FF +`define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 +`define UNCORE_RAM_SUPPORTED 1'b0 `define UNCORE_RAM_BASE 56'h80000000 `define UNCORE_RAM_RANGE 56'h7FFFFFFF `define EXT_MEM_SUPPORTED 1'b0 `define EXT_MEM_BASE 56'h80000000 `define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 +`define CLINT_SUPPORTED 1'b0 `define CLINT_BASE 56'h02000000 `define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 +`define GPIO_SUPPORTED 1'b0 `define GPIO_BASE 56'h10060000 `define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 +`define UART_SUPPORTED 1'b0 `define UART_BASE 56'h10000000 `define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 +`define PLIC_SUPPORTED 1'b0 `define PLIC_BASE 56'h0C000000 `define PLIC_RANGE 56'h03FFFFFF `define SDC_SUPPORTED 1'b0 @@ -138,7 +138,7 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define BPRED_ENABLED 1 +`define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 `define BPRED_SIZE 10 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh deleted file mode 100644 index e820e57cf..000000000 --- a/pipelined/config/rv64ic/wally-config.vh +++ /dev/null @@ -1,146 +0,0 @@ -////////////////////////////////////////// -// wally-config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// include shared configuration -`include "wally-shared.vh" - -`define FPGA 0 -`define QEMU 0 -`define DESIGN_COMPILER 0 - -// RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 - -// IEEE 754 compliance -`define IEEE754 0 - -// MISA RISC-V configuration per specification -`define MISA (32'h00000104) -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 0 -`define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 0 -`define ZFH_SUPPORTED 0 - -// Microarchitectural Features -`define UARCH_PIPELINED 1 -`define UARCH_SUPERSCALR 0 -`define UARCH_SINGLECYCLE 0 - -// LSU microarchitectural Features -`define BUS 0 -`define DCACHE 0 -`define ICACHE 0 -`define VIRTMEM_SUPPORTED 0 -`define VECTORED_INTERRUPTS_SUPPORTED 1 -`define BIGENDIAN_SUPPORTED 0 - -// TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 0 -`define DTLB_ENTRIES 0 - -// Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines -`define DCACHE_NUMWAYS 4 -`define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_LINELENINBITS 512 -`define ICACHE_NUMWAYS 4 -`define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_LINELENINBITS 512 - -// Integer Divider Configuration -// DIV_BITSPERCYCLE must be 1, 2, or 4 -`define DIV_BITSPERCYCLE 4 - -// Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 0 - -// Address space -`define RESET_VECTOR 64'h0000000080000000 - -// Bus Interface width -`define AHBW 64 - -// WFI Timeout Wait -`define WFI_TIMEOUT_BIT 16 - -// Peripheral Physiccal Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b1 -`define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h007FFFFF -`define IROM_SUPPORTED 1'b1 -`define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h007FFFFF -`define BOOTROM_SUPPORTED 1'b0 -`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b0 -`define UNCORE_RAM_BASE 56'h80000000 -`define UNCORE_RAM_RANGE 56'h7FFFFFFF -`define EXT_MEM_SUPPORTED 1'b0 -`define EXT_MEM_BASE 56'h80000000 -`define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 56'h02000000 -`define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 56'h10060000 -`define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 -`define UART_BASE 56'h10000000 -`define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 56'h0C000000 -`define PLIC_RANGE 56'h03FFFFFF -`define SDC_SUPPORTED 1'b0 -`define SDC_BASE 56'h00012100 -`define SDC_RANGE 56'h0000001F - -// Test modes - -// Tie GPIO outputs back to inputs -`define GPIO_LOOPBACK_TEST 1 - -// Hardware configuration -`define UART_PRESCALE 1 - -// Interrupt configuration -`define PLIC_NUM_SRC 10 -// comment out the following if >=32 sources -`define PLIC_NUM_SRC_LT_32 -`define PLIC_GPIO_ID 3 -`define PLIC_UART_ID 10 - -`define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE -`define TESTSBP 0 -`define BPRED_SIZE 10 - -`define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index cd5bb05e3..b4fc2ceab 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -73,11 +73,18 @@ `define H_FMT 2'd2 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits +`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `S_LEN) +`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `S_NE) +`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `S_NF) +`define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : 2'd0) +`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `S_BIAS) +/* Delete once tested dh 10/10/22 + `define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN) `define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE) -`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) +`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) `define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : `F_SUPPORTED ? 2'd0 : 2'd2) -`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS) +`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)*/ // Floating point constants needed for FPU paramerterization `define FPSIZES ((32)'(`Q_SUPPORTED)+(32)'(`D_SUPPORTED)+(32)'(`F_SUPPORTED)+(32)'(`ZFH_SUPPORTED)) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 750486c4e..705fbd61e 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do +for config in rv32e rv64gc rv32gc rv32ic rv32i rv64i rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 48dd7c26c..5318a0f76 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -73,6 +73,15 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) +tests64i = ["arch64i"] +for test in tests64i: + tc = TestCase( + name=test, + variant="rv64i", + cmd="vsim > {} -c <= $signed(mk2)) udigit = 4'b1000; // choose 2 + else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1 + else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0 + else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1 + else udigit = 4'b0001; // choose -2 +endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 987f23576..8ed1664af 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -41,7 +41,7 @@ module fdivsqrtstage2 ( output logic un, output logic [`DIVb+1:0] CNext, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); /* verilator lint_on UNOPTFLAT */ @@ -49,8 +49,7 @@ module fdivsqrtstage2 ( logic up, uz; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; - - assign CNext = {1'b1, C[`DIVb+1:1]}; + logic [`DIVb+3:0] WSA, WCA; // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) @@ -61,8 +60,11 @@ module fdivsqrtstage2 ( // 0010 = -1 // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); + + // Sqrt F generatin fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); + // Divisor multiple always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; // qz @@ -72,7 +74,13 @@ module fdivsqrtstage2 ( // WSA, WCA = WS + WC - qD assign AddIn = SqrtM ? F : Dsel; csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA); + assign WSNext = WSA << 1; + assign WCNext = WCA << 1; + // Shift thermometer code C + assign CNext = {1'b1, C[`DIVb+1:1]}; + + // Unified On-The-Fly Converter to accumulate result fdivsqrtuotfc2 uotfc2(.up, .uz, .C(CNext), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index e463762a2..05792293c 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -30,7 +30,6 @@ `include "wally-config.vh" -/* verilator lint_off UNOPTFLAT */ module fdivsqrtstage4 ( input logic [`DIVN-2:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, @@ -41,17 +40,18 @@ module fdivsqrtstage4 ( input logic SqrtM, j1, output logic un, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); - /* verilator lint_on UNOPTFLAT */ logic [`DIVb+3:0] Dsel; logic [3:0] udigit; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; logic [4:0] Smsbs; + logic [2:0] Dmsbs; + logic [7:0] WCmsbs, WSmsbs; logic CarryIn; - assign CNext = {2'b11, C[`DIVb+1:2]}; + logic [`DIVb+3:0] WSA, WCA; // Digit Selection logic // u encoding: @@ -61,28 +61,40 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; - fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit); + assign Dmsbs = D[`DIVN-2:`DIVN-4]; + assign WCmsbs = WC[`DIVb+3:`DIVb-4]; + assign WSmsbs = WS[`DIVb+3:`DIVb-4]; + + fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit); + assign un = 0; // unused for radix 4 + + // F generation logic fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + // Divisor multiple logic always_comb - case (udigit) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; - 4'b0001: Dsel = D2; - default: Dsel = 'x; - endcase + case (udigit) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase - // Partial Product Generation - // WSA, WCA = WS + WC - qD + // Residual Update + // {WS, WC}}Next = (WS + WC - qD or F) << 2 assign AddIn = SqrtM ? F : Dsel; assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); - - fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + assign WSNext = WSA << 2; + assign WCNext = WCA << 2; - assign un = 0; // unused for radix 4 + // Shift thermometer code C + assign CNext = {2'b11, C[`DIVb+1:2]}; + + // On-the-fly converter to accumulate result + fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index b9a6d9575..550688b4c 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -69,7 +69,7 @@ module hazard( assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap // assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; - assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)) | FDivBusyE; + assign StallMCause = ((wfiM | FDivBusyE) & (~TrapM & ~IntPendingM)); //*** Ross: should FDivBusyE trigger StallECause rather than StallMCause similar to DivBusyE? assign StallWCause = LSUStallM | IFUStallF; assign #1 StallF = StallFCause | StallD; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8bda30b0d..b5163a46e 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -91,7 +91,7 @@ module ifu ( logic [`XLEN-1:0] PCPlus2or4F, PCLinkD; logic [`XLEN-3:0] PCPlusUpperF; logic CompressedF; - logic [31:0] InstrRawD, InstrRawF; + logic [31:0] InstrRawD, InstrRawF, IROMInstrF, ICacheInstrF; logic [31:0] FinalInstrRawF; logic [1:0] IFURWF; @@ -118,6 +118,8 @@ module ifu ( // branch predictor signal logic [`XLEN-1:0] PCNext1F, PCNext2F, PCNext0F; logic BusCommittedF, CacheCommittedF; + logic SelIROM; + assign PCFExt = {2'b00, PCFSpill}; @@ -128,7 +130,7 @@ module ifu ( if(`C_SUPPORTED) begin : SpillSupport - spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF, + spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), .InstrDAPageFaultF, .IFUCacheBusStallF, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpillSupport @@ -166,7 +168,7 @@ module ifu ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(), + .Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), @@ -178,6 +180,7 @@ module ifu ( assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrDAPageFaultF} = '0; assign PCPF = PCFExt[`PA_BITS-1:0]; assign CacheableF = '1; + assign SelIROM = '0; end //////////////////////////////////////////////////////////////////////////////////////////////// @@ -190,13 +193,13 @@ module ifu ( // delay the interrupt until the LSU is in a clean state. assign CommittedF = CacheCommittedF | BusCommittedF; -// logic [`XLEN-1:0] InstrRawF; -// assign InstrRawF = InstrRawF[31:0]; + logic IgnoreRequest; + assign IgnoreRequest = ITLBMissF | TrapM; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. if (`IROM_SUPPORTED) begin : irom assign IFURWF = 2'b10; - irom irom(.clk, .reset, .ce(~CPUBusy), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(FinalInstrRawF)); + irom irom(.clk, .reset, .ce(~CPUBusy | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); end else begin assign IFURWF = 2'b10; @@ -209,12 +212,11 @@ module ifu ( logic [LINELEN-1:0] FetchBuffer; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; - logic SelUncachedAdr; logic [1:0] CacheBusRW, BusRW; - logic IgnoreRequest; + - assign IgnoreRequest = ITLBMissF | TrapM; - assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF}; + //assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF} & ~{SelIROM, SelIROM}; + assign BusRW = ~IgnoreRequest & ~CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) @@ -222,7 +224,7 @@ module ifu ( .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusRW, - .ReadDataWord(FinalInstrRawF), + .ReadDataWord(ICacheInstrF), .Cacheable(CacheableF), .SelReplay('0), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), @@ -238,26 +240,30 @@ module ifu ( .HRDATA, .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), - .WordCount(), .SelUncachedAdr, .SelBusWord(), + .WordCount(), .Cacheable(CacheableF), .SelBusWord(), .CacheBusAck(ICacheBusAck), .FetchBuffer, .PAdr(PCPF), .BusRW, .CPUBusy, .BusStall, .BusCommitted(BusCommittedF)); - mux2 #(32) UnCachedDataMux(.d0(FinalInstrRawF), .d1(FetchBuffer[32-1:0]), - .s(SelUncachedAdr), .y(InstrRawF[31:0])); + mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(FetchBuffer[32-1:0]), .d2(IROMInstrF), + .s({SelIROM, ~CacheableF}), .y(InstrRawF[31:0])); end else begin : passthrough assign IFUHADDR = PCPF; logic CaptureEn; + logic [31:0] FetchBuffer; logic [1:0] BusRW; - assign BusRW = IFURWF & ~{ITLBMissF, ITLBMissF} & ~{TrapM, TrapM}; + assign BusRW = ~IgnoreRequest & ~SelIROM ? IFURWF : '0; +// assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{SelIROM, SelIROM}; assign IFUHSIZE = 3'b010; ahbinterface #(0) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), - .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(InstrRawF[31:0])); + .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); + if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); + else assign InstrRawF = FetchBuffer; assign IFUHBURST = 3'b0; assign {ICacheFetchLine, ICacheStallF, FinalInstrRawF} = '0; assign {ICacheMiss, ICacheAccess} = '0; @@ -265,7 +271,7 @@ module ifu ( end else begin : nobus // block: bus assign BusStall = '0; assign {ICacheStallF, ICacheMiss, ICacheAccess} = '0; - assign InstrRawF = FinalInstrRawF; + assign InstrRawF = IROMInstrF; end assign IFUCacheBusStallF = ICacheStallF | BusStall; diff --git a/pipelined/src/ifu/irom.sv b/pipelined/src/ifu/irom.sv index f136e64f3..af262ba8b 100644 --- a/pipelined/src/ifu/irom.sv +++ b/pipelined/src/ifu/irom.sv @@ -36,8 +36,17 @@ module irom( ); localparam ADDR_WDITH = $clog2(`IROM_RANGE/8); - localparam OFFSET = $clog2(`LLEN/8); + localparam OFFSET = $clog2(`XLEN/8); - rom1p1r #(ADDR_WDITH, 32) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadData)); + logic [`XLEN-1:0] ReadDataFull; + + rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataFull)); + if (`XLEN == 32) assign ReadData = ReadDataFull; + // have to delay Ardr[OFFSET-1] by 1 cycle + else begin + logic AdrD; + flopen #(1) AdrReg(clk, ce, Adr[OFFSET-1], AdrD); + assign ReadData = AdrD ? ReadDataFull[63:32] : ReadDataFull[31:0]; + end endmodule diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 0d08e3fae..6e5430cbb 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -117,6 +117,7 @@ module lsu ( logic [`LLEN-1:0] ReadDataM; logic [(`LLEN-1)/8:0] ByteMaskM; logic SelReplay; + logic SelDTIM; flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); assign IEUAdrExtM = {2'b00, IEUAdrM}; @@ -153,7 +154,6 @@ module lsu ( assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; // MMU and Misalignment fault logic required if privileged unit exists - // *** DH: This is too strong a requirement. Separate MMU in `VIRTMEM_SUPPORTED from simpler faults in `ZICSR_SUPPORTED if(`ZICSR_SUPPORTED == 1) begin : dmmu logic DisableTranslation; assign DisableTranslation = SelHPTW | FlushDCacheM; @@ -168,7 +168,7 @@ module lsu ( .TLBFlush(sfencevmaM), .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), - .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), + .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAmoAccessFaultM, .InstrPageFaultF(),.LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. @@ -190,8 +190,10 @@ module lsu ( assign {DTLBMissM, LoadAccessFaultM, StoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0; assign {LoadPageFaultM, StoreAmoPageFaultM} = '0; - assign PAdrM = IHAdrM; + assign PAdrM = IHAdrM[`PA_BITS-1:0]; assign CacheableM = '1; + assign SelDTIM = `DTIM_SUPPORTED & ~`BUS; // if no pma then select dtim if there is a DTIM. If there is + // a bus then this is always 0. Cannot have both without PMA. end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -200,78 +202,101 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; - logic [`LLEN-1:0] ReadDataWordMuxM; + logic [`LLEN-1:0] ReadDataWordMuxM, DTIMReadDataWordM, DCacheReadDataWordM; logic IgnoreRequest; assign IgnoreRequest = IgnoreRequestTLB | TrapM; if (`DTIM_SUPPORTED) begin : dtim logic [`PA_BITS-1:0] DTIMAdr; - + logic [1:0] DTIMMemRWM; + // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - assign DTIMAdr = MemRWM[0] ? IEUAdrExtM : IEUAdrExtE; // zero extend or contract to PA_BITS - dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM, + assign DTIMAdr = MemRWM[0] ? IEUAdrExtM[`PA_BITS-1:0] : IEUAdrExtE[`PA_BITS-1:0]; // zero extend or contract to PA_BITS + assign DTIMMemRWM = SelDTIM & ~IgnoreRequest ? LSURWM : '0; + dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM(DTIMMemRWM), .Adr(DTIMAdr), .TrapM, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); end else begin end if (`BUS) begin : bus - localparam integer WORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`XLEN : 1; - localparam integer LOGBWPL = `DCACHE ? $clog2(WORDSPERLINE) : 1; + localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; + localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; + localparam integer AHBWWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; + localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(AHBWWORDSPERLINE) : 1; if(`DCACHE) begin : dcache localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; logic [LINELEN-1:0] FetchBuffer; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; - logic [LOGBWPL-1:0] WordCount; - logic SelUncachedAdr, DCacheBusAck; + logic [AHBWLOGBWPL-1:0] WordCount; + logic DCacheBusAck; logic SelBusWord; logic [`XLEN-1:0] PreHWDATA; //*** change name logic [`XLEN/8-1:0] ByteMaskMDelay; logic [1:0] CacheBusRW, BusRW; + localparam integer LLENPOVERAHBW = `LLEN / `AHBW; + logic CacheableOrFlushCacheM; - assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableM, CacheableM}; + assign BusRW = ~CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; + assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( + .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .SelBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), - .ByteMask(ByteMaskM), .WordCount, - .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .SelReplay, + .ByteMask(ByteMaskM), .WordCount(WordCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), + .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableOrFlushCacheM), .SelReplay, .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), - .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), + .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); - ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `DCACHE) ahbcacheinterface( + ahbcacheinterface #(.WORDSPERLINE(AHBWWORDSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .HRDATA, .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .WordCount, .SelBusWord, .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), - .SelUncachedAdr, .BusRW, .CPUBusy, + .Cacheable(CacheableOrFlushCacheM), .BusRW, .CPUBusy, .BusStall, .BusCommitted(BusCommittedM)); - mux2 #(`LLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, FetchBuffer[`XLEN-1:0] }), - .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LSUHWDATAMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM[`XLEN-1:0]), - .s(SelUncachedAdr), .y(PreHWDATA)); + // FetchBuffer[`AHBW-1:0] needs to be duplicated LLENPOVERAHBW times. + // DTIMReadDataWordM should be increased to LLEN. + mux3 #(`LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[`XLEN-1:0]}}), + .d2({{`LLEN-`XLEN{1'b0}}, DTIMReadDataWordM[`XLEN-1:0]}), + .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); - flopen #(`XLEN) wdreg(clk, LSUHREADY, PreHWDATA, LSUHWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN + // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. + logic [`AHBW-1:0] DCacheReadDataWordAHB; + if(LLENPOVERAHBW > 1) begin + logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0]; + genvar index; + for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux + assign AHBWordSets[index] = DCacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)]; + end + assign DCacheReadDataWordAHB = AHBWordSets[WordCount[$clog2(LLENPOVERAHBW)-1:0]]; + end else assign DCacheReadDataWordAHB = DCacheReadDataWordM[`AHBW-1:0]; + mux2 #(`XLEN) LSUHWDATAMux(.d0(DCacheReadDataWordAHB), .d1(LSUWriteDataM[`AHBW-1:0]), + .s(~(CacheableOrFlushCacheM)), .y(PreHWDATA)); - // *** bummer need a second byte mask for bus as it is XLEN rather than LLEN. + flopen #(`AHBW) wdreg(clk, LSUHREADY, PreHWDATA, LSUHWDATA); // delay HWDATA by 1 cycle per spec + + // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - logic [`XLEN/8-1:0] BusByteMaskM; - swbytemask #(`XLEN) busswbytemask(.Size(LSUHSIZE), .Adr(PAdrM[$clog2(`XLEN/8)-1:0]), .ByteMask(BusByteMaskM)); + logic [`AHBW/8-1:0] BusByteMaskM; + swbytemask #(`AHBW) busswbytemask(.Size(LSUHSIZE), .Adr(PAdrM[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); - flop #(`XLEN/8) HWSTRBReg(clk, BusByteMaskM[`XLEN/8-1:0], LSUHWSTRB); + flop #(`AHBW/8) HWSTRBReg(clk, BusByteMaskM[`AHBW/8-1:0], LSUHWSTRB); end else begin : passthrough // just needs a register to hold the value from the bus logic CaptureEn; logic [1:0] BusRW; - assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest}; + logic [`XLEN-1:0] FetchBuffer; + assign BusRW = ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; +// assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{SelDTIM, SelDTIM}; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; @@ -279,15 +304,16 @@ module lsu ( ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), - .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(ReadDataWordM)); - - assign ReadDataWordMuxM = ReadDataWordM; // from byte swapping + .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); + + if(`DTIM_SUPPORTED) mux2 #(`XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); + else assign ReadDataWordMuxM = FetchBuffer[`XLEN-1:0]; assign LSUHBURST = 3'b0; assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; end end else begin: nobus // block: bus assign LSUHWDATA = '0; - assign ReadDataWordMuxM = ReadDataWordM; + assign ReadDataWordMuxM = DTIMReadDataWordM; assign {BusStall, BusCommittedM} = '0; assign {DCacheMiss, DCacheAccess} = '0; assign {DCacheStallM, DCacheCommittedM} = '0; @@ -311,9 +337,6 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - // *** Ross Thompson: I think swr needs to be modified to support bigendian. Both the subword - // selected and the sign extension are probably wrong. I think it should be an invertion of - // the address bits and a different bit selected for extension. subwordread subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); @@ -332,6 +355,7 @@ module lsu ( // hart works little-endian internally // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// + if (`BIGENDIAN_SUPPORTED) begin:endian endianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 8d5ed4de5..610345745 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -47,7 +47,7 @@ module subwordread // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (`XLEN == 64) begin:swrmux + if (`LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -85,19 +85,10 @@ module subwordread always_comb case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw - 3'b011: if(`D_SUPPORTED) - ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld - 3'b100: if(`Q_SUPPORTED) - ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - else - ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen @@ -124,12 +115,8 @@ module subwordread always_comb case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw + 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/pipelined/src/mmu/mmu.sv b/pipelined/src/mmu/mmu.sv index dbf23e98e..da90ee2ca 100644 --- a/pipelined/src/mmu/mmu.sv +++ b/pipelined/src/mmu/mmu.sv @@ -66,7 +66,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries // Physical address outputs output logic [`PA_BITS-1:0] PhysicalAddress, output logic TLBMiss, - output logic Cacheable, Idempotent, AtomicAllowed, + output logic Cacheable, Idempotent, AtomicAllowed, SelTIM, // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, @@ -126,7 +126,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries pmachecker pmachecker(.PhysicalAddress, .Size, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, - .Cacheable, .Idempotent, .AtomicAllowed, + .Cacheable, .Idempotent, .AtomicAllowed, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, diff --git a/pipelined/src/mmu/pmachecker.sv b/pipelined/src/mmu/pmachecker.sv index 455f510dc..df6eb271c 100644 --- a/pipelined/src/mmu/pmachecker.sv +++ b/pipelined/src/mmu/pmachecker.sv @@ -38,7 +38,7 @@ module pmachecker ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use. - output logic Cacheable, Idempotent, AtomicAllowed, + output logic Cacheable, Idempotent, AtomicAllowed, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,6 +60,7 @@ module pmachecker ( assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; assign AtomicAllowed = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; + assign SelTIM = SelRegions[10] | SelRegions[9]; // Detect access faults assign PMAAccessFault = (SelRegions[0]) & AccessRWX; diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 0f61f452d..277ca4266 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -414,5 +414,6 @@ module wallypipelinedcore ( assign FDivBusyE = 0; assign IllegalFPUInstrM = 1; assign SetFflagsM = 0; + assign FpLoadStoreM = 0; end endmodule diff --git a/setup.sh b/setup.sh index b3f9fb11e..2ae92f1bb 100755 --- a/setup.sh +++ b/setup.sh @@ -31,7 +31,7 @@ export PATH=/cad/mentor/questa_sim-2022.1_1/questasim/bin:$PATH # Change this export PATH=/cad/mentor/questa_sim-2021.2_1/questasim/bin:$PATH # Change this for your path to Modelsim, or delete export MGLS_LICENSE_FILE=1717@solidworks.eng.hmc.edu # Change this to your Siemens license server export PATH=/cad/synopsys/SYN/bin:$PATH # Change this for your path to Design Compiler -export SNPSLMD_LICENSE_FILE=27020@134.173.38.214 +export SNPSLMD_LICENSE_FILE=27020@134.173.38.184 # Change this to your license manager file # Imperas; put this in if you are using it #export PATH=$RISCV/imperas-riscv-tests/riscv-ovpsim-plus/bin/Linux64:$PATH