From c5aeb08e5c88e47471500a833fee04f0421d9c9a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 12:44:42 -0500 Subject: [PATCH 01/20] Trying to figure out why the parameterization slowed down modelsim so much. --- config/buildroot/config.vh | 155 +++++++++++++ config/rv32e/config.vh | 155 +++++++++++++ config/rv32e/rv32e-config.vh | 178 +++++++++++++++ config/rv32gc/config.vh | 158 +++++++++++++ config/rv32i/config.vh | 154 +++++++++++++ config/rv32imc/config.vh | 153 +++++++++++++ config/rv64fpquad/config.vh | 156 +++++++++++++ config/rv64gc/config.vh | 159 +++++++++++++ config/rv64i/config.vh | 156 +++++++++++++ config/shared/parameter-defs.vh | 164 ++++++++++++++ config/shared/test-shared.vh | 119 ++++++++++ sim/lint-wally | 2 +- src/wally/cvw.sv | 387 ++++++++++++++++++-------------- src/wally/wallypipelinedcore.sv | 2 +- src/wally/wallypipelinedsoc.sv | 19 +- testbench/testbench.sv | 4 +- 16 files changed, 1940 insertions(+), 181 deletions(-) create mode 100644 config/buildroot/config.vh create mode 100644 config/rv32e/config.vh create mode 100644 config/rv32e/rv32e-config.vh create mode 100644 config/rv32gc/config.vh create mode 100644 config/rv32i/config.vh create mode 100644 config/rv32imc/config.vh create mode 100644 config/rv64fpquad/config.vh create mode 100644 config/rv64gc/config.vh create mode 100644 config/rv64i/config.vh create mode 100644 config/shared/parameter-defs.vh create mode 100644 config/shared/test-shared.vh diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh new file mode 100644 index 000000000..10e20a362 --- /dev/null +++ b/config/buildroot/config.vh @@ -0,0 +1,155 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// include shared configuration +`include "wally-shared.vh" + +localparam FPGA = 1; +localparam QEMU = 0; +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd64; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +localparam MISA = (32'h0014112D); +localparam ZICSR_SUPPORTED = 1; +localparam ZIFENCEI_SUPPORTED = 1; +localparam ZICOUNTERS_SUPPORTED = 1; +localparam COUNTERS = 12'd32; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 1; +localparam ICACHE_SUPPORTED = 1; +localparam VIRTMEM_SUPPORTED = 1; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ; +localparam BIGENDIAN_SUPPORTED = 1; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd32; +localparam DTLB_ENTRIES = 32'd32; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 1; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd16; + +// Address space +localparam RESET_VECTOR = 64'h0000000000001000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h00001FFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h00001FFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 64'h00001000 ; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b1; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b1; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b1; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b1; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Bus Interface width +localparam AHBW = 32'd64; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 0; + +// Hardware configuration +localparam UART_PRESCALE = 32'd0; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd53; +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_UART_ID = 32'd10; +localparam PLIC_GPIO_ID = 32'd3; + +localparam BPRED_SUPPORTED = 1; +localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT; +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + + +localparam SVADU_SUPPORTED = 1; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh new file mode 100644 index 000000000..242cc4edc --- /dev/null +++ b/config/rv32e/config.vh @@ -0,0 +1,155 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd32; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// E +localparam MISA = (32'h00000010); +localparam ZICSR_SUPPORTED = 0; +localparam ZIFENCEI_SUPPORTED = 0; +localparam COUNTERS = 12'd0; +localparam ZICOUNTERS_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 0; +localparam ICACHE_SUPPORTED = 0; +localparam VIRTMEM_SUPPORTED = 0; +localparam VECTORED_INTERRUPTS_SUPPORTED = 0; +localparam BIGENDIAN_SUPPORTED = 0; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd0; +localparam DTLB_ENTRIES = 32'd0; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd1; +localparam IDIV_ON_FPU = 0; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd0; + +// Address space +localparam RESET_VECTOR = 64'h80000000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 64'h00001000; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b0; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b0; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b0; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b0; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Bus Interface width +localparam AHBW = 32'd32; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 0; +localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'd4; +localparam DIVCOPIES = 32'd4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" + \ No newline at end of file diff --git a/config/rv32e/rv32e-config.vh b/config/rv32e/rv32e-config.vh new file mode 100644 index 000000000..4701b69f1 --- /dev/null +++ b/config/rv32e/rv32e-config.vh @@ -0,0 +1,178 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam PA_BITS = 34; +//localparam AHBW = 32; +//localparam XLEN = 32; +//localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ); +////localparam BUS_SUPPORTED = 1'b1; +//localparam ZICSR_SUPPORTED = 1'b0; +localparam M_SUPPORTED = 1'b0; +localparam F_SUPPORTED = 1'b0; +//localparam ZMMUL_SUPPORTED = 1'b0; +//localparam F_SUPPORTED = 1'b0; +//localparam PMP_ENTRIES = 0; +localparam LLEN = 32; +//localparam FPGA = 1'b0; +//localparam QEMU = 1'b0; + // //VPN_SEGMENT_BITS: (LLEN == 32 ? 10 : 9), + // `include "test-shared.vh" +localparam FLEN = 32; + +`include "test-shared.vh" + + + +// include shared configuration +//`include "wally-shared.vh" + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// E +localparam MISA = (32'h00000010); +localparam ZICSR_SUPPORTED = 0; +localparam ZIFENCEI_SUPPORTED = 0; +localparam COUNTERS = 0; +localparam ZICOUNTERS_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 0; +localparam ICACHE_SUPPORTED = 0; +localparam VIRTMEM_SUPPORTED = 0; +localparam VECTORED_INTERRUPTS_SUPPORTED = 0; +localparam BIGENDIAN_SUPPORTED = 0; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 0; +localparam DTLB_ENTRIES = 0; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 4; +localparam DCACHE_WAYSIZEINBYTES = 4096; +localparam DCACHE_LINELENINBITS = 512; +localparam ICACHE_NUMWAYS = 4; +localparam ICACHE_WAYSIZEINBYTES = 4096; +localparam ICACHE_LINELENINBITS = 512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 1; +localparam IDIV_ON_FPU = 0; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 0; + +// Address space +localparam RESET_VECTOR = 32'h80000000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 34'h80000000; +localparam DTIM_RANGE = 34'h007FFFFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 34'h80000000; +localparam IROM_RANGE = 34'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 34'h00001000; +localparam BOOTROM_RANGE = 34'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 34'h80000000; +localparam UNCORE_RAM_RANGE = 34'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 34'h80000000; +localparam EXT_MEM_RANGE = 34'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b0; +localparam CLINT_BASE = 34'h02000000; +localparam CLINT_RANGE = 34'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b0; +localparam GPIO_BASE = 34'h10060000; +localparam GPIO_RANGE = 34'h000000FF; +localparam UART_SUPPORTED = 1'b0; +localparam UART_BASE = 34'h10000000; +localparam UART_RANGE = 34'h00000007; +localparam PLIC_SUPPORTED = 1'b0; +localparam PLIC_BASE = 34'h0C000000; +localparam PLIC_RANGE = 34'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 34'h00012100; +localparam SDC_RANGE = 34'h0000001F; + +// Bus Interface width +localparam AHBW = 32; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 3; +localparam PLIC_UART_ID = 10; + +localparam BPRED_SUPPORTED = 0; +localparam BPRED_TYPE = "BP_GSHARE"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BPRED_SIZE = 10; +localparam BTB_SIZE = 10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 4; +localparam DIVCOPIES = 4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + \ No newline at end of file diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh new file mode 100644 index 000000000..1e0a0fb23 --- /dev/null +++ b/config/rv32gc/config.vh @@ -0,0 +1,158 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// include shared configuration +// `include "wally-shared.vh" + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd32; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12 | 1 << 0 | 1 <<3 | 1 << 5); +localparam ZICSR_SUPPORTED = 1; +localparam ZIFENCEI_SUPPORTED = 1; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 1; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 1; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 1; +localparam ICACHE_SUPPORTED = 1; +localparam VIRTMEM_SUPPORTED = 1; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; +localparam BIGENDIAN_SUPPORTED = 1; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd32; +localparam DTLB_ENTRIES = 32'd32; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 1; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd16; + +// Address space +localparam RESET_VECTOR = 64'h80000000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 64'h00001000; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b1; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b1; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b1; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b1; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Bus Interface width +localparam AHBW = 32'd32; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 1; +// BP_GSHARE, BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +// GSHARE_N, GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BPRED_SIZE = 32'd16; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'd4; +localparam DIVCOPIES = 32'd4; + +// bit manipulation +localparam ZBA_SUPPORTED = 1; +localparam ZBB_SUPPORTED = 1; +localparam ZBC_SUPPORTED = 1; +localparam ZBS_SUPPORTED = 1; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh new file mode 100644 index 000000000..affee30c2 --- /dev/null +++ b/config/rv32i/config.vh @@ -0,0 +1,154 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd32; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// I +localparam MISA = (32'h00000104); +localparam ZICSR_SUPPORTED = 0; +localparam ZIFENCEI_SUPPORTED = 0; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 0; +localparam DCACHE_SUPPORTED = 0; +localparam ICACHE_SUPPORTED = 0; +localparam VIRTMEM_SUPPORTED = 0; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; +localparam BIGENDIAN_SUPPORTED = 0; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd32; +localparam DTLB_ENTRIES = 32'd32; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 0; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd0; + +// Address space +localparam RESET_VECTOR = 64'h80000000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b1; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b1; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b0; +localparam BOOTROM_BASE = 64'h00001000; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b0; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b0; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b0; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b0; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b0; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Bus Interface width +localparam AHBW = 32'd32; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 0; +localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" \ No newline at end of file diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh new file mode 100644 index 000000000..cd029635f --- /dev/null +++ b/config/rv32imc/config.vh @@ -0,0 +1,153 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd32; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12); +localparam ZICSR_SUPPORTED = 1; +localparam ZIFENCEI_SUPPORTED = 1; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 1; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 0; +localparam ICACHE_SUPPORTED = 0; +localparam VIRTMEM_SUPPORTED = 0; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; +localparam BIGENDIAN_SUPPORTED = 0; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd0; +localparam DTLB_ENTRIES = 32'd0; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd2; +localparam IDIV_ON_FPU = 0; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd0; + +// Address space +localparam RESET_VECTOR = 64'h80000000; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +localparam DTIM_SUPPORTED = 1'b1; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b1; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b0; +localparam BOOTROM_BASE = 64'h00001000; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b0; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b1; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b1; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b1; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b1; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Bus Interface width +localparam AHBW = 32'd32; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 0; +localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh new file mode 100644 index 000000000..63b919e9c --- /dev/null +++ b/config/rv64fpquad/config.vh @@ -0,0 +1,156 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd64; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// MISA RISC-V configuration per specification +localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ); +localparam ZICSR_SUPPORTED = 1; +localparam ZIFENCEI_SUPPORTED = 1; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 1; +localparam ZFH_SUPPORTED = 1; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 1; +localparam ICACHE_SUPPORTED = 1; +localparam VIRTMEM_SUPPORTED = 1; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ; +localparam BIGENDIAN_SUPPORTED = 1; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd32; +localparam DTLB_ENTRIES = 32'd32; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 1; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd16; + +// Address space +localparam RESET_VECTOR = 64'h0000000080000000; + +// Bus Interface width +localparam AHBW = 32'd64; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Physiccal Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits + +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b1; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b1; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b1; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b1; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 1; +localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh new file mode 100644 index 000000000..1ef0bc08c --- /dev/null +++ b/config/rv64gc/config.vh @@ -0,0 +1,159 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// include shared configuration +// `include "wally-shared.vh" + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd64; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// MISA RISC-V configuration per specification +localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0); +localparam ZICSR_SUPPORTED = 1; +localparam ZIFENCEI_SUPPORTED = 1; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 1; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 1; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 1; +localparam DCACHE_SUPPORTED = 1; +localparam ICACHE_SUPPORTED = 1; +localparam VIRTMEM_SUPPORTED = 1; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; +localparam BIGENDIAN_SUPPORTED = 1; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd32; +localparam DTLB_ENTRIES = 32'd32; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 1; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd16; + +// Address space +localparam RESET_VECTOR = 64'h0000000080000000; + +// Bus Interface width +localparam AHBW = 32'd64; + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Physical Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits + +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? +localparam DTIM_SUPPORTED = 1'b0; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b0; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder; +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b1; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b1; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b1; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b1; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b1; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 1; +localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 1; +localparam ZBB_SUPPORTED = 1; +localparam ZBC_SUPPORTED = 1; +localparam ZBS_SUPPORTED = 1; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh new file mode 100644 index 000000000..9afb890cf --- /dev/null +++ b/config/rv64i/config.vh @@ -0,0 +1,156 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +localparam FPGA = 0; +localparam QEMU = 0; + +// RV32 or RV64: XLEN = 32 or 64 +localparam XLEN = 32'd64; + +// IEEE 754 compliance +localparam IEEE754 = 0; + +// MISA RISC-V configuration per specification +localparam MISA = (32'h00000104); +localparam ZICSR_SUPPORTED = 0; +localparam ZIFENCEI_SUPPORTED = 0; +localparam COUNTERS = 12'd32; +localparam ZICOUNTERS_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 0; + +// LSU microarchitectural Features +localparam BUS_SUPPORTED = 0; +localparam DCACHE_SUPPORTED = 0; +localparam ICACHE_SUPPORTED = 0; +localparam VIRTMEM_SUPPORTED = 0; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; +localparam BIGENDIAN_SUPPORTED = 0; + +// TLB configuration. Entries should be a power of 2 +localparam ITLB_ENTRIES = 32'd0; +localparam DTLB_ENTRIES = 32'd0; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +localparam DCACHE_NUMWAYS = 32'd4; +localparam DCACHE_WAYSIZEINBYTES = 32'd4096; +localparam DCACHE_LINELENINBITS = 32'd512; +localparam ICACHE_NUMWAYS = 32'd4; +localparam ICACHE_WAYSIZEINBYTES = 32'd4096; +localparam ICACHE_LINELENINBITS = 32'd512; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 +localparam IDIV_BITSPERCYCLE = 32'd4; +localparam IDIV_ON_FPU = 0; + +// Legal number of PMP entries are 0, 16, or 64 +localparam PMP_ENTRIES = 32'd0; + +// Address space +localparam RESET_VECTOR = 64'h0000000080000000; + +// Bus Interface width +localparam AHBW = (XLEN); + +// WFI Timeout Wait +localparam WFI_TIMEOUT_BIT = 32'd16; + +// Peripheral Physiccal Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits + +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? +localparam DTIM_SUPPORTED = 1'b1; +localparam DTIM_BASE = 64'h80000000; +localparam DTIM_RANGE = 64'h007FFFFF; +localparam IROM_SUPPORTED = 1'b1; +localparam IROM_BASE = 64'h80000000; +localparam IROM_RANGE = 64'h007FFFFF; +localparam BOOTROM_SUPPORTED = 1'b0; +localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +localparam BOOTROM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_SUPPORTED = 1'b0; +localparam UNCORE_RAM_BASE = 64'h80000000; +localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam EXT_MEM_SUPPORTED = 1'b0; +localparam EXT_MEM_BASE = 64'h80000000; +localparam EXT_MEM_RANGE = 64'h07FFFFFF; +localparam CLINT_SUPPORTED = 1'b0; +localparam CLINT_BASE = 64'h02000000; +localparam CLINT_RANGE = 64'h0000FFFF; +localparam GPIO_SUPPORTED = 1'b0; +localparam GPIO_BASE = 64'h10060000; +localparam GPIO_RANGE = 64'h000000FF; +localparam UART_SUPPORTED = 1'b0; +localparam UART_BASE = 64'h10000000; +localparam UART_RANGE = 64'h00000007; +localparam PLIC_SUPPORTED = 1'b0; +localparam PLIC_BASE = 64'h0C000000; +localparam PLIC_RANGE = 64'h03FFFFFF; +localparam SDC_SUPPORTED = 1'b0; +localparam SDC_BASE = 64'h00012100; +localparam SDC_RANGE = 64'h0000001F; + +// Test modes + +// Tie GPIO outputs back to inputs +localparam GPIO_LOOPBACK_TEST = 1; + +// Hardware configuration +localparam UART_PRESCALE = 32'd1; + +// Interrupt configuration +localparam PLIC_NUM_SRC = 32'd10; +// comment out the following if >=32 sources +localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); +localparam PLIC_GPIO_ID = 32'd3; +localparam PLIC_UART_ID = 32'd10; + +localparam BPRED_SUPPORTED = 0; +localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BPRED_SIZE = 32'd10; +localparam BTB_SIZE = 32'd10; + +localparam SVADU_SUPPORTED = 0; +localparam ZMMUL_SUPPORTED = 0; + +// FPU division architecture +localparam RADIX = 32'h4; +localparam DIVCOPIES = 32'h4; + +// bit manipulation +localparam ZBA_SUPPORTED = 0; +localparam ZBB_SUPPORTED = 0; +localparam ZBC_SUPPORTED = 0; +localparam ZBS_SUPPORTED = 0; + +// Memory synthesis configuration +localparam USE_SRAM = 0; + +`include "test-shared.vh" diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh new file mode 100644 index 000000000..58e061fbc --- /dev/null +++ b/config/shared/parameter-defs.vh @@ -0,0 +1,164 @@ + +// Populate parameter structure with values specific to the current configuration + +parameter cvw_t P = '{ + FPGA : FPGA, + QEMU : QEMU, + XLEN : XLEN, + IEEE754 : IEEE754, + MISA : MISA, + AHBW : AHBW, + ZICSR_SUPPORTED : ZICSR_SUPPORTED, + ZIFENCEI_SUPPORTED : ZIFENCEI_SUPPORTED, + COUNTERS : COUNTERS, + ZICOUNTERS_SUPPORTED : ZICOUNTERS_SUPPORTED, + ZFH_SUPPORTED : ZFH_SUPPORTED, + SSTC_SUPPORTED : SSTC_SUPPORTED, + VIRTMEM_SUPPORTED : VIRTMEM_SUPPORTED, + VECTORED_INTERRUPTS_SUPPORTED : VECTORED_INTERRUPTS_SUPPORTED, + BIGENDIAN_SUPPORTED : BIGENDIAN_SUPPORTED, + SVADU_SUPPORTED : SVADU_SUPPORTED, + ZMMUL_SUPPORTED : ZMMUL_SUPPORTED, + BUS_SUPPORTED : BUS_SUPPORTED, + DCACHE_SUPPORTED : DCACHE_SUPPORTED, + ICACHE_SUPPORTED : ICACHE_SUPPORTED, + ITLB_ENTRIES : ITLB_ENTRIES, + DTLB_ENTRIES : DTLB_ENTRIES, + DCACHE_NUMWAYS : DCACHE_NUMWAYS, + DCACHE_WAYSIZEINBYTES : DCACHE_WAYSIZEINBYTES, + DCACHE_LINELENINBITS : DCACHE_LINELENINBITS, + ICACHE_NUMWAYS : ICACHE_NUMWAYS, + ICACHE_WAYSIZEINBYTES : ICACHE_WAYSIZEINBYTES, + ICACHE_LINELENINBITS : ICACHE_LINELENINBITS, + IDIV_BITSPERCYCLE : IDIV_BITSPERCYCLE, + IDIV_ON_FPU : IDIV_ON_FPU, + PMP_ENTRIES : PMP_ENTRIES, + RESET_VECTOR : RESET_VECTOR, + WFI_TIMEOUT_BIT : WFI_TIMEOUT_BIT, + DTIM_SUPPORTED : DTIM_SUPPORTED, + DTIM_BASE : DTIM_BASE, + DTIM_RANGE : DTIM_RANGE, + IROM_SUPPORTED : IROM_SUPPORTED, + IROM_BASE : IROM_BASE, + IROM_RANGE : IROM_RANGE, + BOOTROM_SUPPORTED : BOOTROM_SUPPORTED, + BOOTROM_BASE : BOOTROM_BASE, + BOOTROM_RANGE : BOOTROM_RANGE, + UNCORE_RAM_SUPPORTED : UNCORE_RAM_SUPPORTED, + UNCORE_RAM_BASE : UNCORE_RAM_BASE, + UNCORE_RAM_RANGE : UNCORE_RAM_RANGE, + EXT_MEM_SUPPORTED : EXT_MEM_SUPPORTED, + EXT_MEM_BASE : EXT_MEM_BASE, + EXT_MEM_RANGE : EXT_MEM_RANGE, + CLINT_SUPPORTED : CLINT_SUPPORTED, + CLINT_BASE : CLINT_BASE, + CLINT_RANGE : CLINT_RANGE, + GPIO_SUPPORTED : GPIO_SUPPORTED, + GPIO_BASE : GPIO_BASE, + GPIO_RANGE : GPIO_RANGE, + UART_SUPPORTED : UART_SUPPORTED, + UART_BASE : UART_BASE, + UART_RANGE : UART_RANGE, + PLIC_SUPPORTED : PLIC_SUPPORTED, + PLIC_BASE : PLIC_BASE, + PLIC_RANGE : PLIC_RANGE, + SDC_SUPPORTED : SDC_SUPPORTED, + SDC_BASE : SDC_BASE, + SDC_RANGE : SDC_RANGE, + GPIO_LOOPBACK_TEST : GPIO_LOOPBACK_TEST, + UART_PRESCALE : UART_PRESCALE , + PLIC_NUM_SRC : PLIC_NUM_SRC, + PLIC_NUM_SRC_LT_32 : PLIC_NUM_SRC_LT_32, + PLIC_GPIO_ID : PLIC_GPIO_ID, + PLIC_UART_ID : PLIC_UART_ID, + BPRED_SUPPORTED : BPRED_SUPPORTED, + BPRED_TYPE : BPRED_TYPE, + BPRED_SIZE : BPRED_SIZE, + BTB_SIZE : BTB_SIZE, + RADIX : RADIX, + DIVCOPIES : DIVCOPIES, + ZBA_SUPPORTED : ZBA_SUPPORTED, + ZBB_SUPPORTED : ZBB_SUPPORTED, + ZBC_SUPPORTED : ZBC_SUPPORTED, + ZBS_SUPPORTED : ZBS_SUPPORTED, + USE_SRAM : USE_SRAM, + M_MODE : M_MODE, + S_MODE : S_MODE, + U_MODE : U_MODE, + VPN_SEGMENT_BITS : VPN_SEGMENT_BITS, + VPN_BITS : VPN_BITS, + PPN_BITS : PPN_BITS, + PA_BITS : PA_BITS, + SVMODE_BITS : SVMODE_BITS, + ASID_BASE : ASID_BASE, + ASID_BITS : ASID_BITS, + NO_TRANSLATE : NO_TRANSLATE, + SV32 : SV32, + SV39 : SV39, + SV48 : SV48, + A_SUPPORTED : A_SUPPORTED, + B_SUPPORTED : B_SUPPORTED, + C_SUPPORTED : C_SUPPORTED, + D_SUPPORTED : D_SUPPORTED, + E_SUPPORTED : E_SUPPORTED, + F_SUPPORTED : F_SUPPORTED, + I_SUPPORTED : I_SUPPORTED, + M_SUPPORTED : M_SUPPORTED, + Q_SUPPORTED : Q_SUPPORTED, + S_SUPPORTED : S_SUPPORTED, + U_SUPPORTED : U_SUPPORTED, + LOG_XLEN : LOG_XLEN, + PMPCFG_ENTRIES : PMPCFG_ENTRIES, + Q_LEN : Q_LEN, + Q_NE : Q_NE, + Q_NF : Q_NF, + Q_BIAS : Q_BIAS, + Q_FMT : Q_FMT, + D_LEN : D_LEN, + D_NE : D_NE, + D_NF : D_NF, + D_BIAS : D_BIAS, + D_FMT : D_FMT, + S_LEN : S_LEN, + S_NE : S_NE, + S_NF : S_NF, + S_BIAS : S_BIAS, + S_FMT : S_FMT, + H_LEN : H_LEN, + H_NE : H_NE, + H_NF : H_NF, + H_BIAS : H_BIAS, + H_FMT : H_FMT, + FLEN : FLEN, + NE : NE , + NF : NF , + FMT : FMT , + BIAS : BIAS, + FPSIZES : FPSIZES, + FMTBITS : FMTBITS, + LEN1 : LEN1 , + NE1 : NE1 , + NF1 : NF1 , + FMT1 : FMT1 , + BIAS1 : BIAS1, + LEN2 : LEN2 , + NE2 : NE2 , + NF2 : NF2 , + FMT2 : FMT2 , + BIAS2 : BIAS2, + CVTLEN : CVTLEN, + LLEN : LLEN, + LOGCVTLEN : LOGCVTLEN, + NORMSHIFTSZ : NORMSHIFTSZ, + LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, + CORRSHIFTSZ : CORRSHIFTSZ, + DIVN : DIVN, + LOGR : LOGR, + RK : RK, + LOGRK : LOGRK, + FPDUR : FPDUR, + DURLEN : DURLEN, + DIVb : DIVb, + DIVBLEN : DIVBLEN, + DIVa : DIVa +}; diff --git a/config/shared/test-shared.vh b/config/shared/test-shared.vh new file mode 100644 index 000000000..04961071f --- /dev/null +++ b/config/shared/test-shared.vh @@ -0,0 +1,119 @@ +// constants defining different privilege modes +// defined in Table 1.1 of the privileged spec +localparam M_MODE = (2'b11); +localparam S_MODE = (2'b01); +localparam U_MODE = (2'b00); + +// Virtual Memory Constants +localparam VPN_SEGMENT_BITS = (XLEN == 32 ? 32'd10 : 32'd9); +localparam VPN_BITS = (XLEN==32 ? (2*VPN_SEGMENT_BITS) : (4*VPN_SEGMENT_BITS)); +localparam PPN_BITS = (XLEN==32 ? 32'd22 : 32'd44); +localparam PA_BITS = (XLEN==32 ? 32'd34 : 32'd56); +localparam SVMODE_BITS = (XLEN==32 ? 32'd1 : 32'd4); +localparam ASID_BASE = (XLEN==32 ? 32'd22 : 32'd44); +localparam ASID_BITS = (XLEN==32 ? 32'd9 : 32'd16); + +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec +localparam NO_TRANSLATE = 4'd0; +localparam SV32 = 4'd1; +localparam SV39 = 4'd8; +localparam SV48 = 4'd9; + +// macros to define supported modes +localparam A_SUPPORTED = ((MISA >> 0) % 2 == 1); +localparam B_SUPPORTED = ((ZBA_SUPPORTED | ZBB_SUPPORTED | ZBC_SUPPORTED | ZBS_SUPPORTED));// not based on MISA +localparam C_SUPPORTED = ((MISA >> 2) % 2 == 1); +localparam D_SUPPORTED = ((MISA >> 3) % 2 == 1); +localparam E_SUPPORTED = ((MISA >> 4) % 2 == 1); +localparam F_SUPPORTED = ((MISA >> 5) % 2 == 1); +localparam I_SUPPORTED = ((MISA >> 8) % 2 == 1); +localparam M_SUPPORTED = ((MISA >> 12) % 2 == 1); +localparam Q_SUPPORTED = ((MISA >> 16) % 2 == 1); +localparam S_SUPPORTED = ((MISA >> 18) % 2 == 1); +localparam U_SUPPORTED = ((MISA >> 20) % 2 == 1); +// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21 + +// logarithm of XLEN, used for number of index bits to select +localparam LOG_XLEN = (XLEN == 32 ? 32'd5 : 32'd6); + +// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) +localparam PMPCFG_ENTRIES = (PMP_ENTRIES/32'd8); + +// Floating point constants for Quad, Double, Single, and Half precisions +// Lim: I've made some of these 64 bit to avoid width warnings. +// If errors crop up, try downsizing back to 32. +localparam Q_LEN = 32'd128; +localparam Q_NE = 32'd15; +localparam Q_NF = 32'd112; +localparam Q_BIAS = 32'd16383; +localparam Q_FMT = 2'd3; +localparam D_LEN = 32'd64; +localparam D_NE = 32'd11; +localparam D_NF = 32'd52; +localparam D_BIAS = 32'd1023; +localparam D_FMT = 2'd1; +localparam S_LEN = 32'd32; +localparam S_NE = 32'd8; +localparam S_NF = 32'd23; +localparam S_BIAS = 32'd127; +localparam S_FMT = 2'd0; +localparam H_LEN = 32'd16; +localparam H_NE = 32'd5; +localparam H_NF = 32'd10; +localparam H_BIAS = 32'd15; +localparam H_FMT = 2'd2; + +// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits +localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN); +localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE); +localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF); +localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0); +localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS); +/* Delete once tested dh 10/10/22 + +localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : F_SUPPORTED ? S_LEN : H_LEN); +localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : F_SUPPORTED ? S_NE : H_NE); +localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : F_SUPPORTED ? S_NF : H_NF); +localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : F_SUPPORTED ? 2'd0 : 2'd2); +localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/ + +// Floating point constants needed for FPU paramerterization +localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED)); +localparam FMTBITS = ((32)'(FPSIZES>=3)+1); +localparam LEN1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN : H_LEN); +localparam NE1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE : H_NE); +localparam NF1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF : H_NF); +localparam FMT1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1 : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0 : 2'd2); +localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS); +localparam LEN2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN : H_LEN); +localparam NE2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE : H_NE); +localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); +localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); +localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); + +// division constants +localparam DIVN = (((NF(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); +localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); +localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4))); + + +// Disable spurious Verilator warnings + +/* verilator lint_off STMTDLY */ +/* verilator lint_off ASSIGNDLY */ +/* verilator lint_off PINCONNECTEMPTY */ \ No newline at end of file diff --git a/sim/lint-wally b/sim/lint-wally index 4ff93af60..8de8010df 100755 --- a/sim/lint-wally +++ b/sim/lint-wally @@ -8,7 +8,7 @@ basepath=$(dirname $0)/.. for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do #for config in rv64gc; do echo "$config linting..." - if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then + if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/wally/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then echo "Exiting after $config lint due to errors or warnings" exit 1 fi diff --git a/src/wally/cvw.sv b/src/wally/cvw.sv index eb6146283..5e6764190 100644 --- a/src/wally/cvw.sv +++ b/src/wally/cvw.sv @@ -28,188 +28,237 @@ // Instead, CORE-V-Wally loads the appropriate configuration one time and places it in a package // that is referenced by all Wally modules but not by other subsystems. -// Load configuration-specific information -`include "wally-config.vh" +`ifndef CVW_T + +`define CVW_T 1 -// Place configuration in a package package cvw; - parameter XLEN = `XLEN; - parameter FPGA = `FPGA; - parameter QEMU = `QEMU; - parameter IEEE754 = `IEEE754; - parameter MISA = `MISA; - parameter ZICSR_SUPPORTED = `ZICSR_SUPPORTED; - parameter ZIFENCEI_SUPPORTED = `ZIFENCEI_SUPPORTED; - parameter COUNTERS = `COUNTERS; - parameter ZICOUNTERS_SUPPORTED = `ZICOUNTERS_SUPPORTED; - parameter ZFH_SUPPORTED = `ZFH_SUPPORTED; - parameter BUS_SUPPORTED = `BUS_SUPPORTED; - parameter DCACHE_SUPPORTED = `DCACHE_SUPPORTED; - parameter ICACHE_SUPPORTED = `ICACHE_SUPPORTED; - parameter VIRTMEM_SUPPORTED = `VIRTMEM_SUPPORTED; - parameter VECTORED_INTERRUPTS_SUPPORTED = `VECTORED_INTERRUPTS_SUPPORTED; - parameter BIGENDIAN_SUPPORTED = `BIGENDIAN_SUPPORTED; - parameter ITLB_ENTRIES = `ITLB_ENTRIES; - parameter DTLB_ENTRIES = `DTLB_ENTRIES; - parameter DCACHE_NUMWAYS = `DCACHE_NUMWAYS; - parameter DCACHE_WAYSIZEINBYTES = `DCACHE_WAYSIZEINBYTES; - parameter DCACHE_LINELENINBITS = `DCACHE_LINELENINBITS; - parameter ICACHE_NUMWAYS = `ICACHE_NUMWAYS; - parameter ICACHE_WAYSIZEINBYTES = `ICACHE_WAYSIZEINBYTES; - parameter ICACHE_LINELENINBITS = `ICACHE_LINELENINBITS; - parameter IDIV_BITSPERCYCLE = `IDIV_BITSPERCYCLE; - parameter IDIV_ON_FPU = `IDIV_ON_FPU; - parameter PMP_ENTRIES = `PMP_ENTRIES; - parameter RESET_VECTOR = `RESET_VECTOR; - parameter WFI_TIMEOUT_BIT = `WFI_TIMEOUT_BIT; - parameter DTIM_SUPPORTED = `DTIM_SUPPORTED; - parameter DTIM_BASE = `DTIM_BASE; - parameter DTIM_RANGE = `DTIM_RANGE; - parameter IROM_SUPPORTED = `IROM_SUPPORTED; - parameter IROM_BASE = `IROM_BASE; - parameter IROM_RANGE = `IROM_RANGE; - parameter BOOTROM_SUPPORTED = `BOOTROM_SUPPORTED; - parameter BOOTROM_BASE = `BOOTROM_BASE; - parameter BOOTROM_RANGE = `BOOTROM_RANGE; - parameter UNCORE_RAM_SUPPORTED = `UNCORE_RAM_SUPPORTED; - parameter UNCORE_RAM_BASE = `UNCORE_RAM_BASE; - parameter UNCORE_RAM_RANGE = `UNCORE_RAM_RANGE; - parameter EXT_MEM_SUPPORTED = `EXT_MEM_SUPPORTED; - parameter EXT_MEM_BASE = `EXT_MEM_BASE; - parameter EXT_MEM_RANGE = `EXT_MEM_RANGE; - parameter CLINT_SUPPORTED = `CLINT_SUPPORTED; - parameter CLINT_BASE = `CLINT_BASE; - parameter CLINT_RANGE = `CLINT_RANGE; - parameter GPIO_SUPPORTED = `GPIO_SUPPORTED; - parameter GPIO_BASE = `GPIO_BASE; - parameter GPIO_RANGE = `GPIO_RANGE; - parameter UART_SUPPORTED = `UART_SUPPORTED; - parameter UART_BASE = `UART_BASE; - parameter UART_RANGE = `UART_RANGE; - parameter PLIC_SUPPORTED = `PLIC_SUPPORTED; - parameter PLIC_BASE = `PLIC_BASE; - parameter PLIC_RANGE = `PLIC_RANGE; - parameter SDC_SUPPORTED = `SDC_SUPPORTED; - parameter SDC_BASE = `SDC_BASE; - parameter SDC_RANGE = `SDC_RANGE; - parameter AHBW = `AHBW; - parameter GPIO_LOOPBACK_TEST = `GPIO_LOOPBACK_TEST; - parameter UART_PRESCALE = `UART_PRESCALE; - parameter PLIC_NUM_SRC = `PLIC_NUM_SRC; - parameter PLIC_GPIO_ID = `PLIC_GPIO_ID; - parameter PLIC_UART_ID = `PLIC_UART_ID; - parameter BPRED_SUPPORTED = `BPRED_SUPPORTED; - parameter BPRED_TYPE = `BPRED_TYPE; - parameter BPRED_SIZE = `BPRED_SIZE; - parameter SVADU_SUPPORTED = `SVADU_SUPPORTED; -// parameter = `; + +typedef struct packed { + logic FPGA; // Modifications to tare + logic QEMU; // Hacks to agree with QEMU during Linux boot + int XLEN; // Machine width (32 or 64) + logic IEEE754; // IEEE754 NaN handling (0 = use RISC-V NaN propagation instead) + int MISA; // Machine Instruction Set Architecture + int AHBW; // AHB bus width (usually = XLEN) + + // RISC-V Features + logic ZICSR_SUPPORTED; + logic ZIFENCEI_SUPPORTED; + logic [11:0] COUNTERS; + logic ZICOUNTERS_SUPPORTED; + logic ZFH_SUPPORTED; + logic SSTC_SUPPORTED; + logic VIRTMEM_SUPPORTED; + logic VECTORED_INTERRUPTS_SUPPORTED; + logic BIGENDIAN_SUPPORTED; + logic SVADU_SUPPORTED; + logic ZMMUL_SUPPORTED; + + // Microarchitectural Features + logic BUS_SUPPORTED; + logic DCACHE_SUPPORTED; + logic ICACHE_SUPPORTED; + +// TLB configuration. Entries should be a power of 2 + int ITLB_ENTRIES; + int DTLB_ENTRIES; + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 ints per way, 256 bit or more lines + int DCACHE_NUMWAYS; + int DCACHE_WAYSIZEINBYTES; + int DCACHE_LINELENINBITS; + int ICACHE_NUMWAYS; + int ICACHE_WAYSIZEINBYTES; + int ICACHE_LINELENINBITS; + +// Integer Divider Configuration +// IDIV_BITSPERCYCLE must be 1, 2, or 4 + int IDIV_BITSPERCYCLE; + logic IDIV_ON_FPU; + +// Legal number of PMP entries are 0, 16, or 64 + int PMP_ENTRIES; + +// Address space + longint RESET_VECTOR; + +// WFI Timeout Wait + int WFI_TIMEOUT_BIT; + +// Peripheral Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits + logic DTIM_SUPPORTED; + longint DTIM_BASE; + longint DTIM_RANGE; + logic IROM_SUPPORTED; + longint IROM_BASE; + longint IROM_RANGE; + logic BOOTROM_SUPPORTED; + longint BOOTROM_BASE; + longint BOOTROM_RANGE; + logic UNCORE_RAM_SUPPORTED; + longint UNCORE_RAM_BASE; + longint UNCORE_RAM_RANGE; + logic EXT_MEM_SUPPORTED; + longint EXT_MEM_BASE; + longint EXT_MEM_RANGE; + logic CLINT_SUPPORTED; + longint CLINT_BASE; + longint CLINT_RANGE; + logic GPIO_SUPPORTED; + longint GPIO_BASE; + longint GPIO_RANGE; + logic UART_SUPPORTED; + longint UART_BASE; + longint UART_RANGE; + logic PLIC_SUPPORTED; + longint PLIC_BASE; + longint PLIC_RANGE; + logic SDC_SUPPORTED; + longint SDC_BASE; + longint SDC_RANGE; + +// Test modes + +// Tie GPIO outputs back to inputs + logic GPIO_LOOPBACK_TEST; + +// Hardware configuration + int UART_PRESCALE ; + +// Interrupt configuration + int PLIC_NUM_SRC; + logic PLIC_NUM_SRC_LT_32; + int PLIC_GPIO_ID; + int PLIC_UART_ID; + + logic BPRED_SUPPORTED; + longint BPRED_TYPE; + int BPRED_SIZE; + int BTB_SIZE; - // Shared parameters +// FPU division architecture + int RADIX; + int DIVCOPIES; - // constants defining different privilege modes - // defined in Table 1.1 of the privileged spec - parameter M_MODE = (2'b11); - parameter S_MODE = (2'b01); - parameter U_MODE = (2'b00); +// bit manipulation + logic ZBA_SUPPORTED; + logic ZBB_SUPPORTED; + logic ZBC_SUPPORTED; + logic ZBS_SUPPORTED; - // Virtual Memory Constants - parameter VPN_SEGMENT_BITS = (`XLEN == 32 ? 10 : 9); - parameter VPN_BITS = (`XLEN==32 ? (2*`VPN_SEGMENT_BITS) : (4*`VPN_SEGMENT_BITS)); - parameter PPN_BITS = (`XLEN==32 ? 22 : 44); - parameter PA_BITS = (`XLEN==32 ? 34 : 56); - parameter SVMODE_BITS = (`XLEN==32 ? 1 : 4); - parameter ASID_BASE = (`XLEN==32 ? 22 : 44); - parameter ASID_BITS = (`XLEN==32 ? 9 : 16); +// Memory synthesis configuration + logic USE_SRAM; - // constants to check SATP_MODE against - // defined in Table 4.3 of the privileged spec - parameter NO_TRANSLATE = 0; - parameter SV32 = 1; - parameter SV39 = 8; - parameter SV48 = 9; +// constants defining different privilege modes +// defined in Table 1.1 of the privileged spec + logic [1:0] M_MODE ; + logic [1:0] S_MODE ; + logic [1:0] U_MODE ; - // macros to define supported modes - parameter A_SUPPORTED = ((`MISA >> 0) % 2 == 1); - parameter B_SUPPORTED = ((`ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED | `ZBS_SUPPORTED)); // not based on MISA - parameter C_SUPPORTED = ((`MISA >> 2) % 2 == 1); - parameter D_SUPPORTED = ((`MISA >> 3) % 2 == 1); - parameter E_SUPPORTED = ((`MISA >> 4) % 2 == 1); - parameter F_SUPPORTED = ((`MISA >> 5) % 2 == 1); - parameter I_SUPPORTED = ((`MISA >> 8) % 2 == 1); - parameter M_SUPPORTED = ((`MISA >> 12) % 2 == 1); - parameter Q_SUPPORTED = ((`MISA >> 16) % 2 == 1); - parameter S_SUPPORTED = ((`MISA >> 18) % 2 == 1); - parameter U_SUPPORTED = ((`MISA >> 20) % 2 == 1); - // N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21 +// Virtual Memory Constants + int VPN_SEGMENT_BITS; + int VPN_BITS; + int PPN_BITS; + int PA_BITS; + int SVMODE_BITS; + int ASID_BASE; + int ASID_BITS; - // logarithm of XLEN, used for number of index bits to select - parameter LOG_XLEN = (`XLEN == 32 ? 5 : 6); +// constants to check SATP_MODE against +// defined in Table 4.3 of the privileged spec + logic [3:0] NO_TRANSLATE; + logic [3:0] SV32; + logic [3:0] SV39; + logic [3:0] SV48; - // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) - parameter PMPCFG_ENTRIES = (`PMP_ENTRIES/8); - - // Floating point constants for Quad, Double, Single, and Half precisions - parameter Q_LEN = 32'd128; - parameter Q_NE = 32'd15; - parameter Q_NF = 32'd112; - parameter Q_BIAS = 32'd16383; - parameter Q_FMT = 2'd3; - parameter D_LEN = 32'd64; - parameter D_NE = 32'd11; - parameter D_NF = 32'd52; - parameter D_BIAS = 32'd1023; - parameter D_FMT = 2'd1; - parameter S_LEN = 32'd32; - parameter S_NE = 32'd8; - parameter S_NF = 32'd23; - parameter S_BIAS = 32'd127; - parameter S_FMT = 2'd0; - parameter H_LEN = 32'd16; - parameter H_NE = 32'd5; - parameter H_NF = 32'd10; - parameter H_BIAS = 32'd15; - parameter H_FMT = 2'd2; - - // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits - parameter FLEN = (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `S_LEN); - parameter NE = (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `S_NE); - parameter NF = (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `S_NF); - parameter FMT = (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : 2'd0); - parameter BIAS = (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `S_BIAS); +// macros to define supported modes + logic A_SUPPORTED; + logic B_SUPPORTED; + logic C_SUPPORTED; + logic D_SUPPORTED; + logic E_SUPPORTED; + logic F_SUPPORTED; + logic I_SUPPORTED; + logic M_SUPPORTED; + logic Q_SUPPORTED; + logic S_SUPPORTED; + logic U_SUPPORTED; - // Floating point constants needed for FPU paramerterization - parameter FPSIZES = ((32)'(`Q_SUPPORTED)+(32)'(`D_SUPPORTED)+(32)'(`F_SUPPORTED)+(32)'(`ZFH_SUPPORTED)); - parameter FMTBITS = ((32)'(`FPSIZES>=3)+1); - parameter LEN1 = ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN); - parameter NE1 = ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE); - parameter NF1 = ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF); - parameter FMT1 = ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 2'd1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 2'd0 : 2'd2); - parameter BIAS1 = ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS); - parameter LEN2 = ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN); - parameter NE2 = ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE); - parameter NF2 = ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF); - parameter FMT2 = ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2); - parameter BIAS2 = ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS); +// logarithm of XLEN, used for number of index bits to select + int LOG_XLEN; - // largest length in IEU/FPU - parameter CVTLEN = ((`NF<`XLEN) ? (`XLEN) : (`NF)); - parameter LLEN = ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)); - parameter LOGCVTLEN = $unsigned($clog2(`CVTLEN+1)); - parameter NORMSHIFTSZ = (((`CVTLEN+`NF+1)>(`DIVb + 1 +`NF+1) & (`CVTLEN+`NF+1)>(3*`NF+6)) ? (`CVTLEN+`NF+1) : ((`DIVb + 1 +`NF+1) > (3*`NF+6) ? (`DIVb + 1 +`NF+1) : (3*`NF+6))); - parameter LOGNORMSHIFTSZ = ($clog2(`NORMSHIFTSZ)); - parameter CORRSHIFTSZ = (((`CVTLEN+`NF+1)>(`DIVb + 1 +`NF+1) & (`CVTLEN+`NF+1)>(3*`NF+6)) ? (`CVTLEN+`NF+1) : ((`DIVN+1+`NF) > (3*`NF+4) ? (`DIVN+1+`NF) : (3*`NF+4))); +// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) + int PMPCFG_ENTRIES; - // division constants +// Floating point constants for Quad, Double, Single, and Half precisions + int Q_LEN; + int Q_NE; + int Q_NF; + int Q_BIAS; + logic [1:0] Q_FMT; + int D_LEN; + int D_NE; + int D_NF; + int D_BIAS; + logic [1:0] D_FMT; + int S_LEN; + int S_NE; + int S_NF; + int S_BIAS; + logic [1:0] S_FMT; + int H_LEN; + int H_NE; + int H_NF; + int H_BIAS; + logic [1:0] H_FMT; - parameter DIVN = (((`NF<`XLEN) & `IDIV_ON_FPU) ? `XLEN : `NF+2); // standard length of input - parameter LOGR = ($clog2(`RADIX)); // r = log(R) - parameter RK = (`LOGR*`DIVCOPIES); // r*k used for intdiv preproc - parameter LOGRK = ($clog2(`RK)); // log2(r*k) - parameter FPDUR = ((`DIVN+1+(`LOGR*`DIVCOPIES))/(`LOGR*`DIVCOPIES)+(`RADIX/4)); - parameter DURLEN = ($clog2(`FPDUR+1)); - parameter DIVb = (`FPDUR*`LOGR*`DIVCOPIES-1); // canonical fdiv size (b) - parameter DIVBLEN = ($clog2(`DIVb+1)-1); - parameter DIVa = (`DIVb+1-`XLEN); // used for idiv on fpu +// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits + int FLEN; + int NE ; + int NF ; + logic [1:0] FMT ; + int BIAS; + +// Floating point constants needed for FPU paramerterization + int FPSIZES; + int FMTBITS; + int LEN1 ; + int NE1 ; + int NF1 ; + logic [1:0] FMT1 ; + int BIAS1; + int LEN2 ; + int NE2 ; + int NF2 ; + logic [1:0] FMT2 ; + int BIAS2; + +// largest length in IEU/FPU + int CVTLEN; + int LLEN; + int LOGCVTLEN; + int NORMSHIFTSZ; + int LOGNORMSHIFTSZ; + int CORRSHIFTSZ; + +// division constants + int DIVN ; + int LOGR; + int RK ; + int LOGRK ; + int FPDUR ; + int DURLEN ; + int DIVb ; + int DIVBLEN ; + int DIVa ; + +} cvw_t; endpackage + +`endif diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index ee5df5956..f6946ec60 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -29,7 +29,7 @@ //import cvw::*; // global CORE-V-Wally parameters `include "wally-config.vh" -module wallypipelinedcore ( +module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Privileged input logic MTimerInt, MExtInt, SExtInt, MSwInt, diff --git a/src/wally/wallypipelinedsoc.sv b/src/wally/wallypipelinedsoc.sv index 0e3632451..bcb4c9523 100644 --- a/src/wally/wallypipelinedsoc.sv +++ b/src/wally/wallypipelinedsoc.sv @@ -26,22 +26,23 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "config.vh" //import cvw::*; // global CORE-V-Wally parameters `include "wally-config.vh" -module wallypipelinedsoc ( +module wallypipelinedsoc import cvw::*; ( input logic clk, input logic reset_ext, // external asynchronous reset pin output logic reset, // reset synchronized to clk to prevent races on release // AHB Interface - input logic [`AHBW-1:0] HRDATAEXT, + input logic [AHBW-1:0] HRDATAEXT, input logic HREADYEXT, HRESPEXT, output logic HSELEXT, // outputs to external memory, shared with uncore memory output logic HCLK, HRESETn, - output logic [`PA_BITS-1:0] HADDR, - output logic [`AHBW-1:0] HWDATA, - output logic [`XLEN/8-1:0] HWSTRB, + output logic [PA_BITS-1:0] HADDR, + output logic [AHBW-1:0] HWDATA, + output logic [XLEN/8-1:0] HWSTRB, output logic HWRITE, output logic [2:0] HSIZE, output logic [2:0] HBURST, @@ -64,24 +65,26 @@ module wallypipelinedsoc ( ); // Uncore signals - logic [`AHBW-1:0] HRDATA; // from AHB mux in uncore + logic [AHBW-1:0] HRDATA; // from AHB mux in uncore logic HRESP; // response from AHB logic MTimerInt, MSwInt;// timer and software interrupts from CLINT logic [63:0] MTIME_CLINT; // from CLINT to CSRs logic MExtInt,SExtInt; // from PLIC + `include "parameter-defs.vh" + // synchronize reset to SOC clock domain synchronizer resetsync(.clk, .d(reset_ext), .q(reset)); // instantiate processor and internal memories - wallypipelinedcore core(.clk, .reset, + wallypipelinedcore #(P) core(.clk, .reset, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK ); // instantiate uncore if a bus interface exists - if (`BUS_SUPPORTED) begin : uncore + if (BUS_SUPPORTED) begin : uncore uncore uncore(.HCLK, .HRESETn, .TIMECLK, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT, .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HSELEXT, diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 826663409..ae14fc9ae 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -28,7 +28,7 @@ `include "wally-config.vh" `include "tests.vh" -`define PrintHPMCounters 1 +`define PrintHPMCounters 0 `define BPRED_LOGGER 0 `define I_CACHE_ADDR_LOGGER 0 `define D_CACHE_ADDR_LOGGER 0 @@ -204,7 +204,7 @@ module testbench; assign SDCDat = '0; end - wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, + wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOIN, .GPIOOUT, .GPIOEN, .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); From d3123fc00a7d594ddc202962d4ddc25c364ee6fd Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 14:05:44 -0500 Subject: [PATCH 02/20] Updated a large number of the source files to use parameters rather than `defines. Based on Lim's work. So far there is no simulation slow down. --- config/rv32e/config.vh | 2 +- sim/wally-batch.do | 8 +-- sim/wave.do | 26 +++---- src/ieu/bmu/bmuctrl.sv | 30 ++++---- src/ieu/controller.sv | 43 ++++++------ src/ieu/datapath.sv | 108 ++++++++++++++-------------- src/ieu/extend.sv | 20 +++--- src/ieu/ieu.sv | 31 ++++----- src/ieu/regfile.sv | 12 ++-- src/ifu/ifu.sv | 120 ++++++++++++++++---------------- src/ifu/spill.sv | 27 ++++--- src/wally/wallypipelinedcore.sv | 71 +++++++++---------- 12 files changed, 236 insertions(+), 262 deletions(-) diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index 242cc4edc..be5862f58 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -152,4 +152,4 @@ localparam ZBS_SUPPORTED = 0; localparam USE_SRAM = 0; `include "test-shared.vh" - \ No newline at end of file + diff --git a/sim/wally-batch.do b/sim/wally-batch.do index df49518c1..6b5acbb92 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -59,7 +59,7 @@ if {$argc >= 3} { # default to config/rv64ic, but allow this to be overridden at the command line. For example: # do wally-pipelined-batch.do ../config/rv32imc rv32imc if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { - vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 + vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation if { $coverage } { echo "wally-batch buildroot coverage" @@ -88,7 +88,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { exec ./slack-notifier/slack-notifier.py } elseif {$2 eq "ahb"} { - vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 + vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals vopt wkdir/work_${1}_${2}_${3}_${4}.testbench -work wkdir/work_${1}_${2}_${3}_${4} -G TEST=$2 -o testbenchopt @@ -112,7 +112,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # **** fix this so we can pass any number of +defines. # only allows 3 right now - vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 + vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt @@ -126,7 +126,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # power off -r /dut/core/* } else { - vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 + vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals if {$coverage} { diff --git a/sim/wave.do b/sim/wave.do index 53d6eab21..b0cb91e3d 100644 --- a/sim/wave.do +++ b/sim/wave.do @@ -95,12 +95,12 @@ add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/ add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FFLAGS_REGW add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/STATUS_FS add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -divider {class check} -add wave -noupdate -expand -group Bpred -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF -add wave -noupdate -expand -group Bpred -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F +add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF +add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2 @@ -593,23 +593,14 @@ add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/StallM add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRNextF add wave -noupdate /testbench/dut/core/ifu/PCF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRD -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRE -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRM -add wave -noupdate -label BHT /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BHT/mem add wave -noupdate /testbench/reset add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPDirPredD -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCW add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BranchM -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRNextW add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/NewBPDirPredM -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/LHRCommittedF +add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {12208 ns} 1} {{Cursor 4} {30 ns} 0} +WaveRestoreCursors {{Cursor 4} {12208 ns} 1} {{Cursor 4} {435726 ns} 0} quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 194 @@ -625,3 +616,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update +WaveRestoreZoom {435627 ns} {435795 ns} diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index d1fa909d2..ad46ab728 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -27,9 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module bmuctrl( +module bmuctrl import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Decode stage control signals input logic StallD, FlushD, // Stall, flush Decode stage @@ -76,13 +74,13 @@ module bmuctrl( always_comb begin // BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction; - if (`ZBA_SUPPORTED) begin + if (P.ZBA_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) 17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh1add 17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh2add 17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh3add endcase - if (`XLEN==64) + if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) 17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh1add.uw 17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh2add.uw @@ -91,7 +89,7 @@ module bmuctrl( 17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0; // slli.uw endcase end - if (`ZBB_SUPPORTED) begin + if (P.ZBB_SUPPORTED) begin casez({OpD, Funct7D, Funct3D}) 17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // rol 17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // ror @@ -100,13 +98,13 @@ module bmuctrl( else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0])) BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0; // count instruction // // coverage off: This case can't occur in RV64 -// 17'b0110011_0000100_100: if (`XLEN == 32) +// 17'b0110011_0000100_100: if (P.XLEN == 32) // BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32) // // coverage on 17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0; // andn 17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0; // orn 17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0; // xnor - 17'b0010011_011010?_101: if ((`XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000)) + 17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000)) BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8 17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111) BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b @@ -115,12 +113,12 @@ module bmuctrl( 17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min 17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu endcase - if (`XLEN==32) + if (P.XLEN==32) casez({OpD, Funct7D, Funct3D}) 17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32) 17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv32) endcase - else if (`XLEN==64) + else if (P.XLEN==64) casez({OpD, Funct7D, Funct3D}) 17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0; // zexth (rv64) 17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rolw @@ -131,25 +129,25 @@ module bmuctrl( BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0; // count word instruction endcase end - if (`ZBC_SUPPORTED) + if (P.ZBC_SUPPORTED) casez({OpD, Funct7D, Funct3D}) 17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0; // ZBC instruction endcase - if (`ZBS_SUPPORTED) begin // ZBS + if (P.ZBS_SUPPORTED) begin // ZBS casez({OpD, Funct7D, Funct3D}) 17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0; // bclr 17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0; // bext 17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0; // binv 17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0; // bset endcase - if (`XLEN==32) // ZBS 64-bit + if (P.XLEN==32) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) 17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri 17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti 17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi 17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti endcase - else if (`XLEN==64) // ZBS 64-bit + else if (P.XLEN==64) // ZBS 64-bit casez({OpD, Funct7D, Funct3D}) 17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri (rv64) 17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti (rv64) @@ -157,7 +155,7 @@ module bmuctrl( 17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti (rv64) endcase end - if (`ZBB_SUPPORTED | `ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used + if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used casez({OpD, Funct7D, Funct3D}) 17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0; // sra, srl, sll 17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0; // srai, srli, slli @@ -176,5 +174,5 @@ module bmuctrl( assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000); // BMU Execute stage pipieline control register - flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE}); + flopenrc #(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE}); endmodule diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 25395825b..8839b9cad 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -27,10 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - - -module controller( +module controller import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Decode stage control signals input logic StallD, FlushD, // Stall, flush Decode stage @@ -142,30 +139,30 @@ module controller( // Be rigorous about detecting illegal instructions if CSRs or bit manipulation is supported // otherwise be cheap - if (`ZICSR_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED | `ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding + if (P.ZICSR_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED | P.ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD; logic Funct7ShiftZeroD, Funct7Shiftb5D; assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub - assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD; - assign Funct7Shiftb5D = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D; + assign Funct7ShiftZeroD = (P.XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD; + assign Funct7Shiftb5D = (P.XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D; assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101)); assign IFunctD = IShiftD | INoShiftD; assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD; - assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv + assign MFunctD = (Funct7D == 7'b0000001) & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 | - ((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110)); + ((P.XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110)); assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | - ((`XLEN == 64) & (Funct3D == 3'b011)); + ((P.XLEN == 64) & (Funct3D == 3'b011)); assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches assign JFunctD = (Funct3D == 3'b000); assign IWValidFunct3D = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b101; end else begin:legalcheck2 assign IFunctD = 1; // Don't bother to separate out shift decoding assign RFunctD = ~Funct7D[0]; // Not a multiply - assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv + assign MFunctD = Funct7D[0] & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv assign LFunctD = 1; // don't bother to check Funct3 for loads assign SFunctD = 1; // don't bother to check Funct3 for stores assign BFunctD = 1; // don't bother to check Funct3 for branches @@ -182,19 +179,19 @@ module controller( 7'b0000011: if (LFunctD) ControlsD = `CTRLW'b1_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // loads 7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported - 7'b0001111: if (`ZIFENCEI_SUPPORTED) + 7'b0001111: if (P.ZIFENCEI_SUPPORTED) ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop 7'b0010011: if (IFunctD) ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU 7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc - 7'b0011011: if (IFunctD & IWValidFunct3D & `XLEN == 64) + 7'b0011011: if (IFunctD & IWValidFunct3D & P.XLEN == 64) ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_1_0_0_0_0_00_0; // IW-type ALU for RV64i 7'b0100011: if (SFunctD) ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // stores 7'b0100111: ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_1; // fsw - only legal if FP supported - 7'b0101111: if (`A_SUPPORTED) begin + 7'b0101111: if (P.A_SUPPORTED) begin if (InstrD[31:27] == 5'b00010) ControlsD = `CTRLW'b1_000_00_10_001_0_0_0_0_0_0_0_0_0_01_0; // lr else if (InstrD[31:27] == 5'b00011) @@ -207,16 +204,16 @@ module controller( else if (MFunctD) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide 7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui - 7'b0111011: if (RFunctD & (`XLEN == 64)) + 7'b0111011: if (RFunctD & (P.XLEN == 64)) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i - else if (MFunctD & (`XLEN == 64)) + else if (MFunctD & (P.XLEN == 64)) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide 7'b1100011: if (BFunctD) ControlsD = `CTRLW'b0_010_11_00_000_1_0_0_0_0_0_0_0_0_00_0; // branches 7'b1100111: if (JFunctD) ControlsD = `CTRLW'b1_000_01_00_000_0_0_1_1_0_0_0_0_0_00_0; // jalr 7'b1101111: ControlsD = `CTRLW'b1_011_11_00_000_0_0_1_1_0_0_0_0_0_00_0; // jal - 7'b1110011: if (`ZICSR_SUPPORTED) begin + 7'b1110011: if (P.ZICSR_SUPPORTED) begin if (Funct3D == 3'b000) ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_1_0_0_00_0; // privileged; decoded further in priveleged modules else @@ -229,7 +226,7 @@ module controller( // Unswizzle control bits // Squash control signals if coming from an illegal compressed instruction // On RV32E, can't write to upper 16 registers. Checking reads to upper 16 is more costly so disregard them. - assign IllegalERegAdrD = `E_SUPPORTED & `ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11]; + assign IllegalERegAdrD = P.E_SUPPORTED & P.ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11]; //assign IllegalBaseInstrD = 1'b0; assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD, ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD, @@ -247,17 +244,17 @@ module controller( assign BaseSubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // bit manipulation Configuration Block - if (`ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags + if (P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags logic IllegalBitmanipInstrD; // Unrecognized B instruction logic BRegWriteD; // Indicates if it is a R type BMU instruction in decode stage logic BW64D; // Indicates if it is a W type BMU instruction in decode stage logic BSubArithD; // TRUE for BMU ext, clr, andn, orn, xnor logic BALUSrcBD; // BMU alu src select signal - bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, + bmuctrl #(P) bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE); - if (`ZBA_SUPPORTED) begin + if (P.ZBA_SUPPORTED) begin // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ; end else assign sltD = (Funct3D == 3'b010); @@ -290,7 +287,7 @@ module controller( // Fences // Ordinary fence is presently a nop // fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented - if (`ZIFENCEI_SUPPORTED & `ICACHE_SUPPORTED) begin:fencei + if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei logic FenceID; assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction? assign InvalidateICacheD = FenceID; @@ -338,5 +335,5 @@ module controller( // the synchronous DTIM cannot read immediately after write // a cache cannot read or write immediately after a write - assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & `DCACHE_SUPPORTED)) | (|AtomicD)); + assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED)) | (|AtomicD)); endmodule diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index df9216761..cb013ee9d 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -27,16 +27,14 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module datapath ( +module datapath import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Decode stage signals input logic [2:0] ImmSrcD, // Selects type of immediate extension input logic [31:0] InstrD, // Instruction in Decode stage // Execute stage signals - input logic [`XLEN-1:0] PCE, // PC in Execute stage - input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage) + input logic [P.XLEN-1:0] PCE, // PC in Execute stage + input logic [P.XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage) input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages @@ -51,24 +49,24 @@ module datapath ( input logic [2:0] ZBBSelectE, // ZBB mux select signal input logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage output logic [1:0] FlagsE, // Comparison flags ({eq, lt}) - output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU - output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B + output logic [P.XLEN-1:0] IEUAdrE, // Address computed by ALU + output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B // Memory stage signals input logic StallM, FlushM, // Stall, flush Memory stage input logic FWriteIntM, FCvtIntW, // FPU writes integer register file, FPU converts float to int - input logic [`XLEN-1:0] FIntResM, // FPU integer result - output logic [`XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes - output logic [`XLEN-1:0] WriteDataM, // Write data in Memory stage + input logic [P.XLEN-1:0] FIntResM, // FPU integer result + output logic [P.XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes + output logic [P.XLEN-1:0] WriteDataM, // Write data in Memory stage // Writeback stage signals input logic StallW, FlushW, // Stall, flush Writeback stage input logic RegWriteW, IntDivW, // Write register file, integer divide instruction input logic SquashSCW, // Squash a store conditional when a conflict arose input logic [2:0] ResultSrcW, // Select source of result to write back to register file - input logic [`XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result - input logic [`XLEN-1:0] ReadDataW, // Read data from LSU - input logic [`XLEN-1:0] CSRReadValW, // CSR read result - input logic [`XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result - input logic [`XLEN-1:0] FIntDivResultW, // FPU's integer divide result + input logic [P.XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result + input logic [P.XLEN-1:0] ReadDataW, // Read data from LSU + input logic [P.XLEN-1:0] CSRReadValW, // CSR read result + input logic [P.XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result + input logic [P.XLEN-1:0] FIntDivResultW, // FPU's integer divide result // Hazard Unit signals output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, // Register sources to read in Decode or Execute stage output logic [4:0] RdE, RdM, RdW // Register destinations in Execute, Memory, or Writeback stage @@ -76,64 +74,64 @@ module datapath ( // Fetch stage signals // Decode stage signals - logic [`XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2) - logic [`XLEN-1:0] ImmExtD; // Extended immediate in Decode stage + logic [P.XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2) + logic [P.XLEN-1:0] ImmExtD; // Extended immediate in Decode stage logic [4:0] RdD; // Destination register in Decode stage // Execute stage signals - logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file - logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage - logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands - logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage + logic [P.XLEN-1:0] R1E, R2E; // Source operands read from register file + logic [P.XLEN-1:0] ImmExtE; // Extended immediate in Execute stage + logic [P.XLEN-1:0] SrcAE, SrcBE; // ALU operands + logic [P.XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage // Memory stage signals - logic [`XLEN-1:0] IEUResultM; // Result from execution stage - logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register + logic [P.XLEN-1:0] IEUResultM; // Result from execution stage + logic [P.XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register // Writeback stage signals - logic [`XLEN-1:0] SCResultW; // Store Conditional result - logic [`XLEN-1:0] ResultW; // Result to write to register file - logic [`XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register - logic [`XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int - logic [`XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division) + logic [P.XLEN-1:0] SCResultW; // Store Conditional result + logic [P.XLEN-1:0] ResultW; // Result to write to register file + logic [P.XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register + logic [P.XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int + logic [P.XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division) // Decode stage assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D); - extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD); + regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D); + extend #(P.XLEN, P.A_SUPPORTED) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD); // Execute stage pipeline register and logic - flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); - flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E); - flopenrc #(`XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE); + flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); + flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E); + flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE); flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE); - mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE); - comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); - mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); - mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); - alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE); - mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); - mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); + mux3 #(P.XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE); + mux3 #(P.XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE); + comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); + mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); + mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); + alu #(P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE); + mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE); + mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); // Memory stage pipeline register - flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); - flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); - flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); - flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); + flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); + flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); + flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); // Writeback stage pipeline register and logic - flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); - flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); + flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW); + flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); // floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt - if (`F_SUPPORTED) begin:fpmux - mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); - mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); - if (`IDIV_ON_FPU) begin - mux2 #(`XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW); + if (P.F_SUPPORTED) begin:fpmux + mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); + mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); + if (P.IDIV_ON_FPU) begin + mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW); end else begin assign MulDivResultW = MDUResultW; end @@ -142,9 +140,9 @@ module datapath ( assign IFCvtResultW = IFResultW; assign MulDivResultW = MDUResultW; end - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(P.XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); // handle Store Conditional result if atomic extension supported - if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; + if (P.A_SUPPORTED) assign SCResultW = {{(P.XLEN-1){1'b0}}, SquashSCW}; else assign SCResultW = 0; -endmodule \ No newline at end of file +endmodule diff --git a/src/ieu/extend.sv b/src/ieu/extend.sv index 51a10a46c..70a429b16 100644 --- a/src/ieu/extend.sv +++ b/src/ieu/extend.sv @@ -27,29 +27,27 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module extend ( +module extend #(parameter XLEN, A_SUPPORTED) ( input logic [31:7] InstrD, // All instruction bits except opcode (lower 7 bits) input logic [2:0] ImmSrcD, // Select what kind of extension to perform - output logic [`XLEN-1:0 ] ImmExtD); // Extended immediate + output logic [XLEN-1:0 ] ImmExtD); // Extended immediate - localparam [`XLEN-1:0] undefined = {(`XLEN){1'bx}}; // could change to 0 after debug + localparam [XLEN-1:0] undefined = {(XLEN){1'bx}}; // could change to 0 after debug always_comb case(ImmSrcD) // I-type - 3'b000: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:20]}; + 3'b000: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:20]}; // S-type (stores) - 3'b001: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; + 3'b001: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; // B-type (branches) - 3'b010: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; + 3'b010: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; // J-type (jal) - 3'b011: ImmExtD = {{(`XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; + 3'b011: ImmExtD = {{(XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; // U-type (lui, auipc) - 3'b100: ImmExtD = {{(`XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; + 3'b100: ImmExtD = {{(XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; // Store Conditional: zero offset - 3'b101: if (`A_SUPPORTED) ImmExtD = 0; + 3'b101: if (A_SUPPORTED) ImmExtD = 0; else ImmExtD = undefined; default: ImmExtD = undefined; // undefined endcase diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 02fa1dd7c..daebc98f6 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -26,45 +26,44 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module ieu ( +module ieu import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Decode stage signals input logic [31:0] InstrD, // Instruction input logic IllegalIEUFPUInstrD, // Illegal instruction output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers // Execute stage signals - input logic [`XLEN-1:0] PCE, // PC - input logic [`XLEN-1:0] PCLinkE, // PC + 4 + input logic [P.XLEN-1:0] PCE, // PC + input logic [P.XLEN-1:0] PCLinkE, // PC + 4 output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE) input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int - output logic [`XLEN-1:0] IEUAdrE, // Memory address + output logic [P.XLEN-1:0] IEUAdrE, // Memory address output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction output logic [2:0] Funct3E, // Funct3 instruction field - output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B + output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B output logic [4:0] RdE, // Destination register // Memory stage signals input logic SquashSCW, // Squash store conditional, from LSU output logic [1:0] MemRWM, // Read/write control goes to LSU output logic [1:0] AtomicM, // Atomic control goes to LSU - output logic [`XLEN-1:0] WriteDataM, // Write data to LSU + output logic [P.XLEN-1:0] WriteDataM, // Write data to LSU output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU - output logic [`XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU + output logic [P.XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU output logic [4:0] RdM, // Destination register - input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) + input logic [P.XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid output logic BranchD, BranchE, output logic JumpD, JumpE, // Writeback stage signals - input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) - input logic [`XLEN-1:0] CSRReadValW, // CSR read value, - input logic [`XLEN-1:0] MDUResultW, // multiply/divide unit result - input logic [`XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result + input logic [P.XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt) + input logic [P.XLEN-1:0] CSRReadValW, // CSR read value, + input logic [P.XLEN-1:0] MDUResultW, // multiply/divide unit result + input logic [P.XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result input logic FCvtIntW, // FPU converts float to int output logic [4:0] RdW, // Destination register - input logic [`XLEN-1:0] ReadDataW, // LSU's read data + input logic [P.XLEN-1:0] ReadDataW, // LSU's read data // Hazard unit signals input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit input logic FlushD, FlushE, FlushM, FlushW, // Flush signals @@ -96,7 +95,7 @@ module ieu ( logic BranchSignedE; // Branch does signed comparison on operands logic MDUE; // Multiply/divide instruction -controller c( + controller #(P) c( .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD, .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, @@ -105,7 +104,7 @@ controller c( .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); - datapath dp( + datapath #(P) dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE, diff --git a/src/ieu/regfile.sv b/src/ieu/regfile.sv index 967a2101e..5eff24022 100644 --- a/src/ieu/regfile.sv +++ b/src/ieu/regfile.sv @@ -27,18 +27,16 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module regfile ( +module regfile #(parameter XLEN, E_SUPPORTED) ( input logic clk, reset, input logic we3, // Write enable input logic [4:0] a1, a2, a3, // Source registers to read (a1, a2), destination register to write (a3) - input logic [`XLEN-1:0] wd3, // Write data for port 3 - output logic [`XLEN-1:0] rd1, rd2); // Read data for ports 1, 2 + input logic [XLEN-1:0] wd3, // Write data for port 3 + output logic [XLEN-1:0] rd1, rd2); // Read data for ports 1, 2 - localparam NUMREGS = `E_SUPPORTED ? 16 : 32; // only 16 registers in E mode + localparam NUMREGS = E_SUPPORTED ? 16 : 32; // only 16 registers in E mode - logic [`XLEN-1:0] rf[NUMREGS-1:1]; + logic [XLEN-1:0] rf[NUMREGS-1:1]; integer i; // Three ported register file diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 82e8a33b9..1b0c66ba5 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -25,9 +25,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module ifu ( +module ifu import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, @@ -39,31 +37,31 @@ module ifu ( input logic BranchD, BranchE, input logic JumpD, JumpE, // Bus interface - output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU - input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU + output logic [P.PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU + input logic [P.XLEN-1:0] HRDATA, // Bus read data from IFU to EBU input logic IFUHREADY, // Bus ready from IFU to EBU output logic IFUHWRITE, // Bus write operation from IFU to EBU output logic [2:0] IFUHSIZE, // Bus operation size from IFU to EBU output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU - output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW + output logic [P.XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW // Execute - output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) + output logic [P.XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic PCSrcE, // Executation stage branch is taken - input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address - input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address - output logic [`XLEN-1:0] PCE, // Execution stage instruction address + input logic [P.XLEN-1:0] IEUAdrE, // The branch/jump target address + input logic [P.XLEN-1:0] IEUAdrM, // The branch/jump target address + output logic [P.XLEN-1:0] PCE, // Execution stage instruction address output logic BPWrongE, // Prediction is wrong output logic BPWrongM, // Prediction is wrong // Mem output logic CommittedF, // I$ or bus memory operation started, delay interrupts - input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. - output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence + input logic [P.XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. + output logic [P.XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence output logic [31:0] InstrD, // The decoded instruction in Decode stage output logic [31:0] InstrM, // The decoded instruction in Memory stage output logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL - output logic [`XLEN-1:0] PCM, // Memory stage instruction address + output logic [P.XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic BPDirPredWrongM, // Prediction direction is wrong @@ -79,10 +77,10 @@ module ifu ( output logic InstrMisalignedFaultM, // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) // mmu management input logic [1:0] PrivilegeModeW, // Priviledge mode in Writeback stage - input logic [`XLEN-1:0] PTE, // Hardware page table walker (HPTW) writes Page table entry (PTE) to ITLB + input logic [P.XLEN-1:0] PTE, // Hardware page table walker (HPTW) writes Page table entry (PTE) to ITLB input logic [1:0] PageType, // Hardware page table walker (HPTW) writes PageType to ITLB input logic ITLBWriteF, // Writes PTE and PageType to ITLB - input logic [`XLEN-1:0] SATP_REGW, // Location of the root page table and page table configuration + input logic [P.XLEN-1:0] SATP_REGW, // Location of the root page table and page table configuration input logic STATUS_MXR, // Status CSR: make executable page readable input logic STATUS_SUM, // Status CSR: Supervisor access to user memory input logic STATUS_MPRV, // Status CSR: modify machine privilege @@ -90,8 +88,8 @@ module ifu ( input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit - input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit + input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit + input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault output logic ICacheAccess, // Report I$ read to performance counters output logic ICacheMiss // Report I$ miss to performance counters @@ -99,17 +97,17 @@ module ifu ( localparam [31:0] nop = 32'h00000013; // instruction for NOP - logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 + logic [P.XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) - logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) - logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill - logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) - logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F - logic [`XLEN-1:0] PCD; // Decode stage instruction address - logic [`XLEN-1:0] NextValidPCE; // The PC of the next valid instruction in the pipeline after csr write or fence - logic [`XLEN-1:0] PCF; // Fetch stage instruction address - logic [`PA_BITS-1:0] PCPF; // Physical address after address translation - logic [`XLEN+1:0] PCFExt; // + logic [P.XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) + logic [P.XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill + logic [P.XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) + logic [P.XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F + logic [P.XLEN-1:0] PCD; // Decode stage instruction address + logic [P.XLEN-1:0] NextValidPCE; // The PC of the next valid instruction in the pipeline after csr write or fence + logic [P.XLEN-1:0] PCF; // Fetch stage instruction address + logic [P.PA_BITS-1:0] PCPF; // Physical address after address translation + logic [P.XLEN+1:0] PCFExt; // logic [31:0] IROMInstrF; // Instruction from the IROM logic [31:0] ICacheInstrF; // Instruction from the I$ @@ -133,7 +131,7 @@ module ifu ( logic IFUCacheBusStallF; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal - logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF + logic [P.XLEN-1:0] PC1NextF; // Branch predictor next PCF logic BusCommittedF; // Bus memory operation in flight, delay interrupts logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM @@ -145,8 +143,8 @@ module ifu ( // Spill Support ///////////////////////////////////////////////////////////////////////////////////////////// - if(`C_SUPPORTED) begin : Spill - spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, + if(P.C_SUPPORTED) begin : Spill + spill #(P) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, .InstrUpdateDAF, .IFUCacheBusStallF, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCSpillNextF = PCNextF; @@ -159,7 +157,7 @@ module ifu ( // Memory management //////////////////////////////////////////////////////////////////////////////////////////////// - if(`ZICSR_SUPPORTED == 1) begin : immu + if(P.ZICSR_SUPPORTED == 1) begin : immu /////////////////////////////////////////// // sfence.vma causes TLB flushes /////////////////////////////////////////// @@ -172,7 +170,7 @@ module ifu ( flopr #(1) StallMReg(.clk, .reset, .d(StallM), .q(StallMQ)); assign TLBFlush = sfencevmaM & ~StallMQ; - mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1)) + mmu #(.TLB_ENTRIES(P.ITLB_ENTRIES), .IMMU(1)) immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation(1'b0), .VAdr(PCFExt), @@ -193,7 +191,7 @@ module ifu ( end else begin assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = '0; - assign PCPF = PCFExt[`PA_BITS-1:0]; + assign PCPF = PCFExt[P.PA_BITS-1:0]; assign CacheableF = '1; assign SelIROM = '0; end @@ -212,31 +210,31 @@ module ifu ( assign IgnoreRequest = ITLBMissF | FlushD; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. - if (`IROM_SUPPORTED) begin : irom + if (P.IROM_SUPPORTED) begin : irom logic IROMce; assign IROMce = ~GatedStallD | reset; assign IFURWF = 2'b10; - irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[`XLEN-1:0]), .IROMInstrF); + irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[P.XLEN-1:0]), .IROMInstrF); end else begin assign IFURWF = 2'b10; end - if (`BUS_SUPPORTED) begin : bus + if (P.BUS_SUPPORTED) begin : bus // **** must fix words per line vs beats per line as in lsu. - localparam WORDSPERLINE = `ICACHE_SUPPORTED ? `ICACHE_LINELENINBITS/`XLEN : 1; - localparam LOGBWPL = `ICACHE_SUPPORTED ? $clog2(WORDSPERLINE) : 1; - if(`ICACHE_SUPPORTED) begin : icache - localparam LINELEN = `ICACHE_SUPPORTED ? `ICACHE_LINELENINBITS : `XLEN; - localparam LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam WORDSPERLINE = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS/P.XLEN : 1; + localparam LOGBWPL = P.ICACHE_SUPPORTED ? $clog2(WORDSPERLINE) : 1; + if(P.ICACHE_SUPPORTED) begin : icache + localparam LINELEN = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS : P.XLEN; + localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) logic [LINELEN-1:0] FetchBuffer; - logic [`PA_BITS-1:0] ICacheBusAdr; + logic [P.PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; logic [1:0] CacheBusRW, BusRW, CacheRWF; assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; - cache #(.LINELEN(`ICACHE_LINELENINBITS), - .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), - .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1)) + cache #(.LINELEN(P.ICACHE_LINELENINBITS), + .NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS), + .NUMWAYS(P.ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1)) icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD), .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), @@ -277,7 +275,7 @@ module ifu ( .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); assign CacheCommittedF = '0; - if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); + if(P.IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); else assign InstrRawF = FetchBuffer; assign IFUHBURST = 3'b0; assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0; @@ -298,17 +296,17 @@ module ifu ( // PCNextF logic //////////////////////////////////////////////////////////////////////////////////////////////// - if(`ZICSR_SUPPORTED | `ZIFENCEI_SUPPORTED) - mux2 #(`XLEN) pcmux2(.d0(PC1NextF), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PC2NextF)); + if(P.ZICSR_SUPPORTED | P.ZIFENCEI_SUPPORTED) + mux2 #(P.XLEN) pcmux2(.d0(PC1NextF), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PC2NextF)); else assign PC2NextF = PC1NextF; - assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); + assign PCNextF = {UnalignedPCNextF[P.XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment + flopenl #(P.XLEN) pcreg(clk, reset, ~StallF, PCNextF, P.RESET_VECTOR[P.XLEN-1:0], PCF); // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 // *** consider using PCPlus2or4F = PCF + CompressedF ? 2 : 4; - assign PCPlus4F = PCF[`XLEN-1:2] + 1; // add 4 to PC + assign PCPlus4F = PCF[P.XLEN-1:2] + 1; // add 4 to PC // choose PC+2 or PC+4 based on CompressedF, which arrives later. // Speeds up critical path as compared to selecting adder input based on CompressedF // *** consider gating PCPlus4F to provide the reset. @@ -320,14 +318,14 @@ module ifu ( if(reset) PCPlus2or4F = '0; else if (CompressedF) // add 2 if (PCF[1]) PCPlus2or4F = {PCPlus4F, 2'b00}; - else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10}; + else PCPlus2or4F = {PCF[P.XLEN-1:2], 2'b10}; else PCPlus2or4F = {PCPlus4F, PCF[1:0]}; // add 4 //////////////////////////////////////////////////////////////////////////////////////////////// // Branch and Jump Predictor //////////////////////////////////////////////////////////////////////////////////////////////// - if (`BPRED_SUPPORTED) begin : bpred + if (P.BPRED_SUPPORTED) begin : bpred bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, @@ -337,7 +335,7 @@ module ifu ( .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred - mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); + mux2 #(P.XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); assign BPWrongE = PCSrcE; assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; @@ -348,10 +346,10 @@ module ifu ( // Decode stage pipeline register and compressed instruction decoding. //////////////////////////////////////////////////////////////////////////////////////////////// // Decode stage pipeline register and logic - flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); + flopenrc #(P.XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); // expand 16-bit compressed instructions to 32 bits - if (`C_SUPPORTED) begin + if (P.C_SUPPORTED) begin logic IllegalCompInstrD; decompress decomp(.InstrRawD, .InstrD, .IllegalCompInstrD); assign IllegalIEUInstrD = IllegalBaseInstrD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr @@ -371,7 +369,7 @@ module ifu ( // only IALIGN=32, the two low bits (mepc[1:0]) are always zero. // Spec 3.1.14 // Traps: Can’t happen. The bottom two bits of MTVEC are ignored so the trap always is to a multiple of 4. See 3.1.7 of the privileged spec. - assign BranchMisalignedFaultE = (IEUAdrE[1] & ~`C_SUPPORTED) & PCSrcE; + assign BranchMisalignedFaultE = (IEUAdrE[1] & ~P.C_SUPPORTED) & PCSrcE; flopenr #(1) InstrMisalignedReg(clk, reset, ~StallM, BranchMisalignedFaultE, InstrMisalignedFaultM); // Instruction and PC/PCLink pipeline registers @@ -380,10 +378,10 @@ module ifu ( mux2 #(32) FlushInstrMMux(InstrE, nop, FlushM, NextInstrE); flopenr #(32) InstrEReg(clk, reset, ~StallE, NextInstrD, InstrE); flopenr #(32) InstrMReg(clk, reset, ~StallM, NextInstrE, InstrM); - flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); - flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); - //flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); - //flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); + flopenr #(P.XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); + flopenr #(P.XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); + //flopenr #(P.XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); + //flopenr #(P.XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); flopenrc #(1) CompressedDReg(clk, reset, FlushD, ~StallD, CompressedF, CompressedD); flopenrc #(1) CompressedEReg(clk, reset, FlushE, ~StallE, CompressedD, CompressedE); diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 54c0f2261..27eaa4107 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -31,30 +31,29 @@ `include "wally-config.vh" -module spill #( - parameter CACHE_ENABLED // Changes spill threshold to 1 if there is no cache -)(input logic clk, +module spill import cvw::*; #(parameter cvw_t P) ( + input logic clk, input logic reset, input logic StallD, FlushD, - input logic [`XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage - input logic [`XLEN-1:2] PCPlus4F, // PCF + 4 - input logic [`XLEN-1:0] PCNextF, // The next PCF + input logic [P.XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage + input logic [P.XLEN-1:2] PCPlus4F, // PCF + 4 + input logic [P.XLEN-1:0] PCNextF, // The next PCF input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic IFUCacheBusStallF, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) - output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill - output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill + output logic [P.XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill + output logic [P.XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic CompressedF); // The fetched instruction is compressed // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; - localparam SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1; + localparam SPILLTHRESHOLD = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS/32 : 1; statetype CurrState, NextState; - logic [`XLEN-1:0] PCPlus2F; + logic [P.XLEN-1:0] PCPlus2F; logic TakeSpillF; logic SpillF; logic SelSpillF; @@ -66,11 +65,11 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// // compute PCF+2 from the raw PC+4 - mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); + mux2 #(P.XLEN) pcplus2mux(.d0({PCF[P.XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); // select between PCNextF and PCF+2 - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelSpillNextF & ~FlushD), .y(PCSpillNextF)); + mux2 #(P.XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelSpillNextF & ~FlushD), .y(PCSpillNextF)); // select between PCF and PCF+2 - mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); + mux2 #(P.XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -78,7 +77,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index f6946ec60..343262e57 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -26,21 +26,18 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -//import cvw::*; // global CORE-V-Wally parameters -`include "wally-config.vh" - module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, // Privileged input logic MTimerInt, MExtInt, SExtInt, MSwInt, input logic [63:0] MTIME_CLINT, // Bus Interface - input logic [`AHBW-1:0] HRDATA, + input logic [P.AHBW-1:0] HRDATA, input logic HREADY, HRESP, output logic HCLK, HRESETn, - output logic [`PA_BITS-1:0] HADDR, - output logic [`AHBW-1:0] HWDATA, - output logic [`XLEN/8-1:0] HWSTRB, + output logic [P.PA_BITS-1:0] HADDR, + output logic [P.AHBW-1:0] HWDATA, + output logic [P.XLEN/8-1:0] HWSTRB, output logic HWRITE, output logic [2:0] HSIZE, output logic [2:0] HBURST, @@ -58,15 +55,15 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic IntDivE, W64E; logic CSRReadM, CSRWriteM, PrivilegedM; logic [1:0] AtomicM; - logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE; - logic [`XLEN-1:0] SrcAM; + logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE; + logic [P.XLEN-1:0] SrcAM; logic [2:0] Funct3E; logic [31:0] InstrD; logic [31:0] InstrM, InstrOrigM; - logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE; - logic [`XLEN-1:0] PCM; - logic [`XLEN-1:0] CSRReadValW, MDUResultW; - logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; + logic [P.XLEN-1:0] PCSpillF, PCE, PCLinkE; + logic [P.XLEN-1:0] PCM; + logic [P.XLEN-1:0] CSRReadValW, MDUResultW; + logic [P.XLEN-1:0] UnalignedPCNextF, PC2NextF; logic [1:0] MemRWM; logic InstrValidD, InstrValidE, InstrValidM; logic InstrMisalignedFaultM; @@ -86,32 +83,32 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic [4:0] RdE, RdM, RdW; logic FPUStallD; logic FWriteIntE; - logic [`FLEN-1:0] FWriteDataM; - logic [`XLEN-1:0] FIntResM; - logic [`XLEN-1:0] FCvtIntResW; + logic [P.FLEN-1:0] FWriteDataM; + logic [P.XLEN-1:0] FIntResM; + logic [P.XLEN-1:0] FCvtIntResW; logic FCvtIntW; logic FDivBusyE; logic FRegWriteM; logic FCvtIntStallD; logic FpLoadStoreM; logic [4:0] SetFflagsM; - logic [`XLEN-1:0] FIntDivResultW; + logic [P.XLEN-1:0] FIntDivResultW; // memory management unit signals logic ITLBWriteF; logic ITLBMissF; - logic [`XLEN-1:0] SATP_REGW; + logic [P.XLEN-1:0] SATP_REGW; logic STATUS_MXR, STATUS_SUM, STATUS_MPRV; logic [1:0] STATUS_MPP, STATUS_FS; logic [1:0] PrivilegeModeW; - logic [`XLEN-1:0] PTE; + logic [P.XLEN-1:0] PTE; logic [1:0] PageType; logic sfencevmaM; logic SelHPTW; // PMA checker signals - var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0]; - var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0]; + var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0]; + var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0]; // IMem stalls logic IFUStallF; @@ -119,14 +116,14 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( // cpu lsu interface logic [2:0] Funct3M; - logic [`XLEN-1:0] IEUAdrE; - logic [`XLEN-1:0] WriteDataM; - logic [`XLEN-1:0] IEUAdrM; - logic [`LLEN-1:0] ReadDataW; + logic [P.XLEN-1:0] IEUAdrE; + logic [P.XLEN-1:0] WriteDataM; + logic [P.XLEN-1:0] IEUAdrM; + logic [P.LLEN-1:0] ReadDataW; logic CommittedM; // AHB ifu interface - logic [`PA_BITS-1:0] IFUHADDR; + logic [P.PA_BITS-1:0] IFUHADDR; logic [2:0] IFUHBURST; logic [1:0] IFUHTRANS; logic [2:0] IFUHSIZE; @@ -134,9 +131,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic IFUHREADY; // AHB LSU interface - logic [`PA_BITS-1:0] LSUHADDR; - logic [`XLEN-1:0] LSUHWDATA; - logic [`XLEN/8-1:0] LSUHWSTRB; + logic [P.PA_BITS-1:0] LSUHADDR; + logic [P.XLEN-1:0] LSUHWDATA; + logic [P.XLEN/8-1:0] LSUHWSTRB; logic LSUHWRITE; logic LSUHREADY; @@ -165,7 +162,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic wfiM, IntPendingM; // instruction fetch unit: PC, branch prediction, instruction cache - ifu ifu(.clk, .reset, + ifu #(P) ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, @@ -188,7 +185,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF); // integer execution unit: integer register file, datapath and controller - ieu ieu(.clk, .reset, + ieu #(P) ieu(.clk, .reset, // Decode Stage interface .InstrD, .IllegalIEUFPUInstrD, .IllegalBaseInstrD, // Execute Stage interface @@ -204,7 +201,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .RdE, .RdM, .FIntResM, .FlushDCacheM, .BranchD, .BranchE, .JumpD, .JumpE, // Writeback stage - .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), + .CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[P.XLEN-1:0]), .InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW, // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, @@ -244,7 +241,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .PCSpillF, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); - if(`BUS_SUPPORTED) begin : ebu + if(P.BUS_SUPPORTED) begin : ebu ebu ebu(// IFU connections .clk, .reset, // IFU interface @@ -272,7 +269,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .FlushD, .FlushE, .FlushM, .FlushW); // privileged unit - if (`ZICSR_SUPPORTED) begin:priv + if (P.ZICSR_SUPPORTED) begin:priv privileged priv( .clk, .reset, .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, @@ -306,7 +303,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // multiply/divide unit - if (`M_SUPPORTED | `ZMMUL_SUPPORTED) begin:mdu + if (P.M_SUPPORTED | P.ZMMUL_SUPPORTED) begin:mdu mdu mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, @@ -317,12 +314,12 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // floating point unit - if (`F_SUPPORTED) begin:fpu + if (P.F_SUPPORTED) begin:fpu fpu fpu( .clk, .reset, .FRM_REGW, // Rounding mode from CSR .InstrD, // instruction from IFU - .ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory + .ReadDataW(ReadDataW[P.FLEN-1:0]),// Read data from memory .ForwardedSrcAE, // Integer input being processed (from IEU) .StallE, .StallM, .StallW, // stall signals from HZU .FlushE, .FlushM, .FlushW, // flush signals from HZU From e33db7f9a7d25175d1d64aa9ef898b39fccbe6c0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 14:49:22 -0500 Subject: [PATCH 03/20] More parameterization. Copied Lim. Still no slow down. --- src/cache/cache.sv | 14 ++++------ src/cache/cacheway.sv | 16 +++++------ src/ebu/ahbcacheinterface.sv | 53 ++++++++++++++++++------------------ src/ebu/ahbinterface.sv | 1 + src/ebu/buscachefsm.sv | 1 - src/ieu/forward.sv | 2 -- src/ifu/decompress.sv | 2 +- src/ifu/ifu.sv | 10 +++---- src/ifu/irom.sv | 16 +++++------ src/lsu/lsu.sv | 6 ++-- 10 files changed, 57 insertions(+), 64 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index c5ad8e1fa..9dbc876fa 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -27,9 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) ( +module cache #(parameter PA_BITS, XLEN, LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) ( input logic clk, input logic reset, input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY @@ -40,7 +38,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] PAdr, // Physical address + input logic [PA_BITS-1:0] PAdr, // Physical address input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only) input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only) output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted @@ -57,7 +55,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic [LOGBWPL-1:0] BeatCount, // Beat in burst input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback) - output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access + output logic [PA_BITS-1:0] CacheBusAdr // Address for bus access ); // Cache parameters @@ -65,7 +63,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field localparam SETLEN = $clog2(NUMLINES); // Number of set bits localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits - localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits + localparam TAGLEN = PA_BITS - SETTOP; // Number of tag bits localparam CACHEWORDSPERLINE = LINELEN/WORDLEN;// Number of words in cache line localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);// Log2 of ^ localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete @@ -114,7 +112,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE AdrSelMuxSel, CacheSet); // Array of cache ways, along with victim, hit, dirty, and read merging logic - cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( + cacheway #(PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SetValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); @@ -152,7 +150,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord); // Bus address for fetch, writeback, or flush writeback - mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + mux3 #(PA_BITS) CacheBusAdrMux(.d0({PAdr[PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index f504f40ad..47c3de69c 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -27,16 +27,14 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, +module cacheway #(parameter PA_BITS, XLEN, NUMLINES=512, LINELEN = 256, TAGLEN = 26, OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) ( input logic clk, input logic reset, input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr - input logic [`PA_BITS-1:0] PAdr, // Physical address + input logic [PA_BITS-1:0] PAdr, // Physical address input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only) input logic SetValid, // Set the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set @@ -54,11 +52,11 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, output logic DirtyWay, // This way is dirty output logic [TAGLEN-1:0] TagWay); // This way's tag if valid - localparam WORDSPERLINE = LINELEN/`XLEN; + localparam WORDSPERLINE = LINELEN/XLEN; localparam BYTESPERLINE = LINELEN/8; localparam LOGWPL = $clog2(WORDSPERLINE); - localparam LOGXLENBYTES = $clog2(`XLEN/8); - localparam BYTESPERWORD = `XLEN/8; + localparam LOGXLENBYTES = $clog2(XLEN/8); + localparam BYTESPERWORD = XLEN/8; logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; @@ -113,12 +111,12 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn), .addr(CacheSet), .dout(ReadTag), - .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); + .din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); // AND portion of distributed tag multiplexer assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux assign DirtyWay = SelTag & Dirty & ValidWay; - assign HitWay = ValidWay & (ReadTag == PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]); + assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); ///////////////////////////////////////////////////////////////////////////////////////////// // Data Array diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 38b1e6879..6775faa18 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -27,9 +27,10 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module ahbcacheinterface #( + parameter AHBW, + parameter LLEN, + parameter PA_BITS, parameter BEATSPERLINE, // Number of AHBW words (beats) in cacheline parameter AHBWLOGBWPL, // Log2 of ^ parameter LINELEN, // Number of bits in cacheline @@ -44,14 +45,14 @@ module ahbcacheinterface #( output logic [2:0] HSIZE, // AHB transaction width output logic [2:0] HBURST, // AHB burst length // bus interface buses - input logic [`AHBW-1:0] HRDATA, // AHB read data - output logic [`PA_BITS-1:0] HADDR, // AHB address - output logic [`AHBW-1:0] HWDATA, // AHB write data - output logic [`AHBW/8-1:0] HWSTRB, // AHB byte mask + input logic [AHBW-1:0] HRDATA, // AHB read data + output logic [PA_BITS-1:0] HADDR, // AHB address + output logic [AHBW-1:0] HWDATA, // AHB write data + output logic [AHBW/8-1:0] HWSTRB, // AHB byte mask // cache interface - input logic [`PA_BITS-1:0] CacheBusAdr, // Address of cache line - input logic [`LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback + input logic [PA_BITS-1:0] CacheBusAdr, // Address of cache line + input logic [LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback input logic CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$ input logic Cacheable, // Memory operation is cachable input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch @@ -61,8 +62,8 @@ module ahbcacheinterface #( output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr // uncached interface - input logic [`PA_BITS-1:0] PAdr, // Physical address of uncached memory operation - input logic [`LLEN-1:0] WriteDataM, // IEU write data for uncached store + input logic [PA_BITS-1:0] PAdr, // Physical address of uncached memory operation + input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write input logic [2:0] Funct3, // Size of uncached memory operation @@ -74,11 +75,11 @@ module ahbcacheinterface #( localparam BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index - logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation + logic [PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA - logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s - logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data + logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s + logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data genvar index; @@ -86,35 +87,35 @@ module ahbcacheinterface #( for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer logic [BEATSPERLINE-1:0] CaptureBeat; assign CaptureBeat[index] = CaptureEn & (index == BeatCountDelayed); - flopen #(`AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA), - .q(FetchBuffer[(index+1)*`AHBW-1:index*`AHBW])); + flopen #(AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA), + .q(FetchBuffer[(index+1)*AHBW-1:index*AHBW])); end - mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); - assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR; + mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); + assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR; - mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); + mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. - logic [`AHBW-1:0] CacheReadDataWordAHB; + logic [AHBW-1:0] CacheReadDataWordAHB; if(LLENPOVERAHBW > 1) begin - logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0]; + logic [AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0]; genvar index; for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux - assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)]; + assign AHBWordSets[index] = CacheReadDataWordM[(index*AHBW)+AHBW-1: (index*AHBW)]; end assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]]; - end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0]; + end else assign CacheReadDataWordAHB = CacheReadDataWordM[AHBW-1:0]; - mux2 #(`AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[`AHBW-1:0]), + mux2 #(AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[AHBW-1:0]), .s(~(CacheableOrFlushCacheM)), .y(PreHWDATA)); - flopen #(`AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec + flopen #(AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); + swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); - flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB); + flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB); buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index 1a9308dd4..e6087ecdc 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -30,6 +30,7 @@ `include "wally-config.vh" module ahbinterface #( + parameter XLEN, parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits )( input logic HCLK, HRESETn, diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 7456cac8b..c286b1bb9 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -27,7 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" `define BURST_EN 1 // Enables burst mode. Disable to show the lost performance. // HCLK and clk must be the same clock! diff --git a/src/ieu/forward.sv b/src/ieu/forward.sv index 62cc5ea4f..ef3cd4b4b 100644 --- a/src/ieu/forward.sv +++ b/src/ieu/forward.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module forward( // Detect hazards input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers diff --git a/src/ifu/decompress.sv b/src/ifu/decompress.sv index bc9ae0abe..582dbecbe 100644 --- a/src/ifu/decompress.sv +++ b/src/ifu/decompress.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" -module decompress ( +module decompress #(parameter XLEN)( input logic [31:0] InstrRawD, // 32-bit instruction or raw compressed 16-bit instruction in bottom half output logic [31:0] InstrD, // Decompressed instruction output logic IllegalCompInstrD // Invalid decompressed instruction diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 1b0c66ba5..3d6f3e3db 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -214,7 +214,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( logic IROMce; assign IROMce = ~GatedStallD | reset; assign IFURWF = 2'b10; - irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[P.XLEN-1:0]), .IROMInstrF); + irom #(P) irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[P.XLEN-1:0]), .IROMInstrF); end else begin assign IFURWF = 2'b10; end @@ -232,7 +232,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; - cache #(.LINELEN(P.ICACHE_LINELENINBITS), + cache #(.PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.ICACHE_LINELENINBITS), .NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS), .NUMWAYS(P.ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1)) icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD), @@ -249,7 +249,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .NextSet(PCSpillNextF[11:0]), .PAdr(PCPF), .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); - ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) + ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), @@ -269,7 +269,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; assign IFUHSIZE = 3'b010; - ahbinterface #(0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), + ahbinterface #(P.XLEN, 0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); @@ -351,7 +351,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( // expand 16-bit compressed instructions to 32 bits if (P.C_SUPPORTED) begin logic IllegalCompInstrD; - decompress decomp(.InstrRawD, .InstrD, .IllegalCompInstrD); + decompress #(P.XLEN) decomp(.InstrRawD, .InstrD, .IllegalCompInstrD); assign IllegalIEUInstrD = IllegalBaseInstrD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr end else begin assign InstrD = InstrRawD; diff --git a/src/ifu/irom.sv b/src/ifu/irom.sv index 88eb8f1ee..321dd9fa8 100644 --- a/src/ifu/irom.sv +++ b/src/ifu/irom.sv @@ -24,27 +24,25 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module irom( +module irom import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic ce, // Chip Enable. 0: Holds IROMInstrF constant - input logic [`XLEN-1:0] Adr, // PCNextFSpill + input logic [P.XLEN-1:0] Adr, // PCNextFSpill output logic [31:0] IROMInstrF // Instruction read data ); - localparam XLENBYTES = `XLEN/8; - localparam ADDR_WDITH = $clog2(`IROM_RANGE/XLENBYTES); + localparam XLENBYTES = {{P.PA_BITS-32{1'b0}}, P.XLEN/8}; // XLEN/8, adjusted for width + localparam ADDR_WDITH = $clog2(P.IROM_RANGE[P.PA_BITS-1:0]/XLENBYTES); localparam OFFSET = $clog2(XLENBYTES); - logic [`XLEN-1:0] IROMInstrFFull; + logic [P.XLEN-1:0] IROMInstrFFull; logic [31:0] RawIROMInstrF; logic [1:0] AdrD; flopen #(2) AdrReg(clk, ce, Adr[2:1], AdrD); - rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); - if (`XLEN == 32) assign RawIROMInstrF = IROMInstrFFull; + rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); + if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull; else begin // IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two // haves. Adr is the Next PCF not PCF so we delay 1 cycle. diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 51efeccb2..9923a5957 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -263,7 +263,7 @@ module lsu ( assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); - cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), + cache #(.PA_BITS(`PA_BITS), .XLEN(`XLEN), .LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), @@ -275,7 +275,7 @@ module lsu ( .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); - ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( + ahbcacheinterface #(.AHBW(`AHBW), .LLEN(`LLEN), .PA_BITS(`PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), @@ -300,7 +300,7 @@ module lsu ( assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), + ahbinterface #(`XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); From 8f9151b1251874eaf5825305bf3596d29c14a71c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 14:56:02 -0500 Subject: [PATCH 04/20] More parameterization. Based on Lim's work. EBU, IFU (except bpred), and IEU done. --- src/ebu/ahbinterface.sv | 20 +++++++++----------- src/ebu/busfsm.sv | 2 -- src/ebu/controllerinput.sv | 13 ++++++------- src/ebu/ebu.sv | 26 ++++++++++++-------------- src/ebu/ebufsmarb.sv | 2 -- src/wally/wallypipelinedcore.sv | 2 +- 6 files changed, 28 insertions(+), 37 deletions(-) diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index e6087ecdc..2988fbb27 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module ahbinterface #( parameter XLEN, parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits @@ -38,30 +36,30 @@ module ahbinterface #( input logic HREADY, // AHB peripheral ready output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ output logic HWRITE, // AHB 0: Read operation 1: Write operation - input logic [`XLEN-1:0] HRDATA, // AHB read data - output logic [`XLEN-1:0] HWDATA, // AHB write data - output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask + input logic [XLEN-1:0] HRDATA, // AHB read data + output logic [XLEN-1:0] HWDATA, // AHB write data + output logic [XLEN/8-1:0] HWSTRB, // AHB byte mask // lsu/ifu interface input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write - input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word - input logic [`XLEN-1:0] WriteData, // IEU write data for a store + input logic [XLEN/8-1:0] ByteMask, // Bytes enables within a word + input logic [XLEN-1:0] WriteData, // IEU write data for a store output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt - output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus + output logic [(LSU ? XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus ); logic CaptureEn; - localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU + localparam LEN = (LSU ? XLEN : 32); // 32 bits for IFU, XLEN for LSU flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer)); if(LSU) begin // delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN - flop #(`XLEN) wdreg(HCLK, WriteData, HWDATA); - flop #(`XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB); + flop #(XLEN) wdreg(HCLK, WriteData, HWDATA); + flop #(XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB); end else begin assign HWDATA = '0; assign HWSTRB = '0; diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index de1dd7583..108cd546d 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - // HCLK and clk must be the same clock! module busfsm ( input logic HCLK, diff --git a/src/ebu/controllerinput.sv b/src/ebu/controllerinput.sv index 6e3c27d3b..9db367a10 100644 --- a/src/ebu/controllerinput.sv +++ b/src/ebu/controllerinput.sv @@ -31,9 +31,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module controllerinput #( + parameter PA_BITS, parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs )( input logic HCLK, @@ -47,14 +46,14 @@ module controllerinput #( input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation input logic [2:0] HSIZEIn, // Manager input. AHB transaction width input logic [2:0] HBURSTIn, // Manager input. AHB burst length - input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address + input logic [PA_BITS-1:0] HADDRIn, // Manager input. AHB address output logic HREADYOut, // Indicate to manager the peripheral is not busy and another manager does not have priority // controller output output logic [1:0] HTRANSOut, // Arbitrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ output logic HWRITEOut, // Arbitrated manager transaction. AHB 0: Read operation 1: Write operation output logic [2:0] HSIZEOut, // Arbitrated manager transaction. AHB transaction width output logic [2:0] HBURSTOut, // Arbitrated manager transaction. AHB burst length - output logic [`PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address + output logic [PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address input logic HREADYIn // Peripheral ready ); @@ -62,13 +61,13 @@ module controllerinput #( logic [2:0] HSIZESave; logic [2:0] HBURSTSave; logic [1:0] HTRANSSave; - logic [`PA_BITS-1:0] HADDRSave; + logic [PA_BITS-1:0] HADDRSave; if (SAVE_ENABLED) begin - flopenr #(1+3+3+2+`PA_BITS) SaveReg(HCLK, ~HRESETn, Save, + flopenr #(1+3+3+2+PA_BITS) SaveReg(HCLK, ~HRESETn, Save, {HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, {HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave}); - mux2 #(1+3+3+2+`PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, + mux2 #(1+3+3+2+PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, {HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave}, Restore, {HWRITEOut, HSIZEOut, HBURSTOut, HTRANSOut, HADDROut}); diff --git a/src/ebu/ebu.sv b/src/ebu/ebu.sv index 8dddff35a..5ceea8dc0 100644 --- a/src/ebu/ebu.sv +++ b/src/ebu/ebu.sv @@ -31,33 +31,31 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module ebu ( +module ebu #(parameter XLEN, PA_BITS, AHBW)( input logic clk, reset, // Signals from IFU input logic [1:0] IFUHTRANS, // IFU AHB transaction request input logic [2:0] IFUHSIZE, // IFU AHB transaction size input logic [2:0] IFUHBURST, // IFU AHB burst length - input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address + input logic [PA_BITS-1:0] IFUHADDR, // IFU AHB address output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant // Signals from LSU input logic [1:0] LSUHTRANS, // LSU AHB transaction request input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read input logic [2:0] LSUHSIZE, // LSU AHB size input logic [2:0] LSUHBURST, // LSU AHB burst length - input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address - input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN - input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask + input logic [PA_BITS-1:0] LSUHADDR, // LSU AHB address + input logic [XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN + input logic [XLEN/8-1:0] LSUHWSTRB, // AHB byte mask output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority // AHB-Lite external signals output logic HCLK, HRESETn, input logic HREADY, // AHB peripheral ready input logic HRESP, // AHB peripheral response. 0: OK 1: Error - output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration - output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration - output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration + output logic [PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration + output logic [AHBW-1:0] HWDATA, // AHB Write data after arbitration + output logic [XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration output logic HWRITE, // AHB transaction direction after arbitration output logic [2:0] HSIZE, // AHB transaction size after arbitration output logic [2:0] HBURST, // AHB burst length after arbitration @@ -73,13 +71,13 @@ module ebu ( logic IFUDisable; logic IFUSelect; - logic [`PA_BITS-1:0] IFUHADDROut; + logic [PA_BITS-1:0] IFUHADDROut; logic [1:0] IFUHTRANSOut; logic [2:0] IFUHBURSTOut; logic [2:0] IFUHSIZEOut; logic IFUHWRITEOut; - logic [`PA_BITS-1:0] LSUHADDROut; + logic [PA_BITS-1:0] LSUHADDROut; logic [1:0] LSUHTRANSOut; logic [2:0] LSUHBURSTOut; logic [2:0] LSUHSIZEOut; @@ -98,14 +96,14 @@ module ebu ( // input stages and muxing for IFU and LSU //////////////////////////////////////////////////////////////////////////////////////////////////// - controllerinput IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable), + controllerinput #(PA_BITS) IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable), .Request(IFUReq), .HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR), .HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY), .HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY)); // LSU always has priority so there should never be a need to save and restore the address phase inputs. - controllerinput #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable), + controllerinput #(PA_BITS, 0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable), .Request(LSUReq), .HWRITEIn(LSUHWRITE), .HSIZEIn(LSUHSIZE), .HBURSTIn(LSUHBURST), .HTRANSIn(LSUHTRANS), .HADDRIn(LSUHADDR), .HREADYOut(LSUHREADY), .HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut), diff --git a/src/ebu/ebufsmarb.sv b/src/ebu/ebufsmarb.sv index a61a3961f..91fa9e491 100644 --- a/src/ebu/ebufsmarb.sv +++ b/src/ebu/ebufsmarb.sv @@ -28,8 +28,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module ebufsmarb ( input logic HCLK, input logic HRESETn, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 343262e57..e38f5da0c 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -242,7 +242,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .LSUStallM); if(P.BUS_SUPPORTED) begin : ebu - ebu ebu(// IFU connections + ebu #(P.XLEN, P.PA_BITS, P.AHBW) ebu(// IFU connections .clk, .reset, // IFU interface .IFUHADDR, .IFUHBURST, .IFUHTRANS, .IFUHREADY, .IFUHSIZE, From 7fc53226acbf740c0fc94ef7fbff450cace3922d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 15:01:35 -0500 Subject: [PATCH 05/20] MDU and hazard unit now also parameterized. Based on Lim's work. Again I want to clarify this their work. Not mine. I'm just doing this because the merge had an issue. --- src/hazard/hazard.sv | 2 - src/mdu/div.sv | 70 ++++++++++++++++----------------- src/mdu/mdu.sv | 34 ++++++++-------- src/mdu/mul.sv | 44 ++++++++++----------- src/wally/wallypipelinedcore.sv | 2 +- 5 files changed, 72 insertions(+), 80 deletions(-) diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 51f2ccf40..8efa454d9 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module hazard ( // Detect hazards input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, diff --git a/src/mdu/div.sv b/src/mdu/div.sv index 848760032..a05e88f6d 100644 --- a/src/mdu/div.sv +++ b/src/mdu/div.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module div( +module div import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, input logic StallM, @@ -36,26 +34,26 @@ module div( input logic IntDivE, // integer division/remainder instruction of any type input logic DivSignedE, // signed division input logic W64E, // W-type instructions (divw, divuw, remw, remuw) - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE,// Forwarding mux outputs for Source A and B + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE,// Forwarding mux outputs for Source A and B output logic DivBusyE, // Divide is busy - stall pipeline - output logic [`XLEN-1:0] QuotM, RemM // Quotient and remainder outputs + output logic [P.XLEN-1:0] QuotM, RemM // Quotient and remainder outputs ); - localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE); // Number of steps + localparam STEPBITS = $clog2(P.XLEN/P.IDIV_BITSPERCYCLE); // Number of steps typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; // division FSM state statetype state; - logic [`XLEN-1:0] W[`IDIV_BITSPERCYCLE:0]; // Residual for each of k steps - logic [`XLEN-1:0] XQ[`IDIV_BITSPERCYCLE:0]; // dividend/quotient for each of k steps - logic [`XLEN-1:0] WNext, XQNext; // initialized W and XQ going into registers - logic [`XLEN-1:0] DinE, XinE; // divisor & dividend, possibly truncated to 32 bits - logic [`XLEN-1:0] DnE; // DnE = ~DinE - logic [`XLEN-1:0] DAbsBE; // absolute value of D - logic [`XLEN-1:0] DAbsB; // registered absolute value of D, constant during division - logic [`XLEN-1:0] XnE; // DXnE = ~XinE - logic [`XLEN-1:0] XInitE; // |X|, or original X for divide by 0 - logic [`XLEN-1:0] WnM, XQnM; // negated residual W and quotient XQ for postprocessing sign correction + logic [P.XLEN-1:0] W[P.IDIV_BITSPERCYCLE:0]; // Residual for each of k steps + logic [P.XLEN-1:0] XQ[P.IDIV_BITSPERCYCLE:0]; // dividend/quotient for each of k steps + logic [P.XLEN-1:0] WNext, XQNext; // initialized W and XQ going into registers + logic [P.XLEN-1:0] DinE, XinE; // divisor & dividend, possibly truncated to 32 bits + logic [P.XLEN-1:0] DnE; // DnE = ~DinE + logic [P.XLEN-1:0] DAbsBE; // absolute value of D + logic [P.XLEN-1:0] DAbsB; // registered absolute value of D, constant during division + logic [P.XLEN-1:0] XnE; // DXnE = ~XinE + logic [P.XLEN-1:0] XInitE; // |X|, or original X for divide by 0 + logic [P.XLEN-1:0] WnM, XQnM; // negated residual W and quotient XQ for postprocessing sign correction logic [STEPBITS:0] step; // division step logic Div0E, Div0M; // divide by 0 logic DivStartE; // start integer division @@ -71,42 +69,42 @@ module div( assign DivBusyE = (state == BUSY) | DivStartE; // Handle sign extension for W-type instructions - if (`XLEN == 64) begin:rv64 // RV64 has W-type instructions - mux2 #(`XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE); - mux2 #(`XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE); + if (P.XLEN == 64) begin:rv64 // RV64 has W-type instructions + mux2 #(P.XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE); + mux2 #(P.XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE); end else begin // RV32 has no W-type instructions assign XinE = ForwardedSrcAE; assign DinE = ForwardedSrcBE; end // Extract sign bits and check fo division by zero - assign SignDE = DivSignedE & DinE[`XLEN-1]; - assign SignXE = DivSignedE & XinE[`XLEN-1]; + assign SignDE = DivSignedE & DinE[P.XLEN-1]; + assign SignXE = DivSignedE & XinE[P.XLEN-1]; assign NegQE = SignDE ^ SignXE; assign Div0E = (DinE == 0); // Take absolute value for signed operations, and negate D to handle subtraction in divider stages - neg #(`XLEN) negd(DinE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp - neg #(`XLEN) negx(XinE, XnE); - mux3 #(`XLEN) xabsmux(XinE, XnE, ForwardedSrcAE, {Div0E, SignXE}, XInitE); // take absolute value for signed operations, or keep original value for divide by 0 + neg #(P.XLEN) negd(DinE, DnE); + mux2 #(P.XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp + neg #(P.XLEN) negx(XinE, XnE); + mux3 #(P.XLEN) xabsmux(XinE, XnE, ForwardedSrcAE, {Div0E, SignXE}, XInitE); // take absolute value for signed operations, or keep original value for divide by 0 ////////////////////////////// // Division Iterations (effectively stalled execute stage, no suffix) ////////////////////////////// // initialization multiplexers on first cycle of operation - mux2 #(`XLEN) wmux(W[`IDIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNext); - mux2 #(`XLEN) xmux(XQ[`IDIV_BITSPERCYCLE], XInitE, DivStartE, XQNext); + mux2 #(P.XLEN) wmux(W[P.IDIV_BITSPERCYCLE], {P.XLEN{1'b0}}, DivStartE, WNext); + mux2 #(P.XLEN) xmux(XQ[P.IDIV_BITSPERCYCLE], XInitE, DivStartE, XQNext); // registers before division steps - flopen #(`XLEN) wreg(clk, DivBusyE, WNext, W[0]); - flopen #(`XLEN) xreg(clk, DivBusyE, XQNext, XQ[0]); - flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsB); + flopen #(P.XLEN) wreg(clk, DivBusyE, WNext, W[0]); + flopen #(P.XLEN) xreg(clk, DivBusyE, XQNext, XQ[0]); + flopen #(P.XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsB); // one copy of divstep for each bit produced per cycle genvar i; - for (i=0; i<`IDIV_BITSPERCYCLE; i = i+1) + for (i=0; i Date: Wed, 24 May 2023 16:12:41 -0500 Subject: [PATCH 06/20] Partial parameterization into mmu. --- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 138 ++++++++++++++++---------------- src/mmu/hptw.sv | 92 +++++++++++---------- src/mmu/mmu.sv | 31 ++++--- src/wally/wallypipelinedcore.sv | 2 +- 5 files changed, 130 insertions(+), 135 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 3d6f3e3db..73bf18dc4 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -170,7 +170,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( flopr #(1) StallMReg(.clk, .reset, .d(StallM), .q(StallMQ)); assign TLBFlush = sfencevmaM & ~StallMQ; - mmu #(.TLB_ENTRIES(P.ITLB_ENTRIES), .IMMU(1)) + mmu #(.P(P), .TLB_ENTRIES(P.ITLB_ENTRIES), .IMMU(1)) immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation(1'b0), .VAdr(PCFExt), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9923a5957..607d3571b 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -29,9 +29,7 @@ // and limitations under the License. ///////////////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module lsu ( +module lsu import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, output logic LSUStallM, // LSU stalls pipeline during a multicycle operation @@ -46,17 +44,17 @@ module lsu ( output logic DCacheMiss, // D cache miss for performance counters output logic DCacheAccess, // D cache memory access for performance counters // address and write data - input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address - output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address - input logic [`XLEN-1:0] WriteDataM, // Write data from IEU - output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU + input logic [P.XLEN-1:0] IEUAdrE, // Execution stage memory address + output logic [P.XLEN-1:0] IEUAdrM, // Memory stage memory address + input logic [P.XLEN-1:0] WriteDataM, // Write data from IEU + output logic [P.LLEN-1:0] ReadDataW, // Read data to IEU or FPU // cpu privilege input logic [1:0] PrivilegeModeW, // Current privilege mode input logic BigEndianM, // Swap byte order to big endian input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic DCacheStallM, // D$ busy with multicycle operation // fpu - input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU + input logic [P.FLEN-1:0] FWriteDataM, // Write data from FPU input logic FpLoadStoreM, // Selects FPU as store for write data // faults output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions @@ -67,34 +65,34 @@ module lsu ( output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault output logic StoreAmoAccessFaultM, // Store or AMO access fault // connect to ahb - output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU - input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU - output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU + output logic [P.PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU + input logic [P.XLEN-1:0] HRDATA, // Bus read data from LSU to EBU + output logic [P.XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU input logic LSUHREADY, // Bus ready from LSU to EBU output logic LSUHWRITE, // Bus write operation from LSU to EBU output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU - output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU + output logic [P.XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU // page table walker - input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR + input logic [P.XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege input logic [1:0] STATUS_MPP, // Machine previous privilege mode - input logic [`XLEN-1:0] PCSpillF, // Fetch PC + input logic [P.XLEN-1:0] PCSpillF, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits - output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB + output logic [P.XLEN-1:0] PTE, // Page table entry write to ITLB output logic [1:0] PageType, // Type of page table entry to write to ITLB output logic ITLBWriteF, // Write PTE to ITLB output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit - input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit + input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit + input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); - logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer - logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer - logic [`PA_BITS-1:0] PAdrM; // Physical memory address - logic [`XLEN+1:0] IHAdrM; // Either IEU or HPTW memory address + logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer + logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer + logic [P.PA_BITS-1:0] PAdrM; // Physical memory address + logic [P.XLEN+1:0] IHAdrM; // Either IEU or HPTW memory address logic [1:0] PreLSURWM; // IEU or HPTW Read/Write signal logic [1:0] LSURWM; // IEU or HPTW Read/Write signal gated by LR/SC @@ -111,19 +109,19 @@ module lsu ( logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts - logic [`LLEN-1:0] DTIMReadDataWordM; // DTIM read data - logic [`LLEN-1:0] DCacheReadDataWordM; // D$ read data - logic [`LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [`LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data - logic [`LLEN-1:0] ReadDataWordM; // Read data before subword selection - logic [`LLEN-1:0] ReadDataM; // Final read data + logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data + logic [P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection + logic [P.LLEN-1:0] ReadDataM; // Final read data - logic [`XLEN-1:0] IHWriteDataM; // IEU or HPTW write data - logic [`XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data - logic [`LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [`LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [`LLEN-1:0] LSUWriteDataM; // Final write data - logic [(`LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write + logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data + logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data + logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data + logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [P.LLEN-1:0] LSUWriteDataM; // Final write data + logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB @@ -140,7 +138,7 @@ module lsu ( // Zero-extend address to 34 bits for XLEN=32 ///////////////////////////////////////////////////////////////////////////////////////////// - flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); + flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; @@ -149,12 +147,12 @@ module lsu ( // MMU include PMP and is needed if any privileged supported ///////////////////////////////////////////////////////////////////////////////////////////// - if(`VIRTMEM_SUPPORTED) begin : hptw - hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, + if(P.VIRTMEM_SUPPORTED) begin : hptw + hptw #(P) hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, - .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN + .ReadDataM(ReadDataM[P.XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .IHWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IHAdrM, .HPTWStall, .SelHPTW, @@ -184,10 +182,10 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists ///////////////////////////////////////////////////////////////////////////////////////////// - if(`ZICSR_SUPPORTED == 1) begin : dmmu + if(P.ZICSR_SUPPORTED == 1) begin : dmmu logic DisableTranslation; // During HPTW walk or D$ flush disable virtual memory address translation assign DisableTranslation = SelHPTW | FlushDCacheM; - mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) + mmu #(.P(P), .TLB_ENTRIES(P.DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), @@ -204,9 +202,9 @@ module lsu ( end else begin // No MMU, so no PMA/page faults and no address translation assign {DTLBMissM, LSULoadAccessFaultM, LSUStoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0; assign {LoadPageFaultM, StoreAmoPageFaultM} = '0; - assign PAdrM = IHAdrM[`PA_BITS-1:0]; + assign PAdrM = IHAdrM[P.PA_BITS-1:0]; assign CacheableM = 1'b1; - assign SelDTIM = `DTIM_SUPPORTED & ~`BUS_SUPPORTED; // if no PMA then select dtim if there is a DTIM. If there is + assign SelDTIM = P.DTIM_SUPPORTED & ~P.BUS_SUPPORTED; // if no PMA then select dtim if there is a DTIM. If there is // a bus then this is always 0. Cannot have both without PMA. end @@ -222,31 +220,31 @@ module lsu ( // Discard memory request on pipeline flush assign IgnoreRequest = IgnoreRequestTLB | FlushW; - if (`DTIM_SUPPORTED) begin : dtim - logic [`PA_BITS-1:0] DTIMAdr; + if (P.DTIM_SUPPORTED) begin : dtim + logic [P.PA_BITS-1:0] DTIMAdr; logic [1:0] DTIMMemRWM; // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - mux2 #(`PA_BITS) DTIMAdrMux(IEUAdrExtE[`PA_BITS-1:0], IEUAdrExtM[`PA_BITS-1:0], MemRWM[0], DTIMAdr); + mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], MemRWM[0], DTIMAdr); assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[P.XLEN-1:0]), .ByteMaskM(ByteMaskM[P.XLEN/8-1:0])); end else begin end - if (`BUS_SUPPORTED) begin : bus - if(`DCACHE_SUPPORTED) begin : dcache - localparam LLENWORDSPERLINE = `DCACHE_LINELENINBITS/`LLEN; // Number of LLEN words in cacheline + if (P.BUS_SUPPORTED) begin : bus + if(P.DCACHE_SUPPORTED) begin : dcache + localparam LLENWORDSPERLINE = P.DCACHE_LINELENINBITS/P.LLEN; // Number of LLEN words in cacheline localparam LLENLOGBWPL = $clog2(LLENWORDSPERLINE); // Log2 of ^ - localparam BEATSPERLINE = `DCACHE_LINELENINBITS/`AHBW; // Number of AHBW words (beats) in cacheline + localparam BEATSPERLINE = P.DCACHE_LINELENINBITS/P.AHBW; // Number of AHBW words (beats) in cacheline localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ - localparam LINELEN = `DCACHE_LINELENINBITS; // Number of bits in cacheline - localparam LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline + localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline - logic [`PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. + logic [P.PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache logic DCacheBusAck; // ahbcacheinterface completed fetch or writeback logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount @@ -263,8 +261,8 @@ module lsu ( assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); - cache #(.PA_BITS(`PA_BITS), .XLEN(`XLEN), .LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache( + cache #(.PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), + .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), @@ -275,7 +273,7 @@ module lsu ( .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); - ahbcacheinterface #(.AHBW(`AHBW), .LLEN(`LLEN), .PA_BITS(`PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( + ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), @@ -289,25 +287,25 @@ module lsu ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. - mux3 #(`LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[`XLEN-1:0]}}), - .d2({{`LLEN-`XLEN{1'b0}}, DTIMReadDataWordM[`XLEN-1:0]}), + mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM - logic [`XLEN-1:0] FetchBuffer; + logic [P.XLEN-1:0] FetchBuffer; assign BusRW = ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(`XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), + ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM - if(`DTIM_SUPPORTED) mux2 #(`XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); - else assign ReadDataWordMuxM = FetchBuffer[`XLEN-1:0]; + if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); + else assign ReadDataWordMuxM = FetchBuffer[P.XLEN-1:0]; assign LSUHBURST = 3'b0; assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; end @@ -322,16 +320,16 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// - if (`A_SUPPORTED) begin:atomic - atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .IHWriteDataM, .PAdrM, + if (P.A_SUPPORTED) begin:atomic + atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[P.XLEN-1:0]), .IHWriteDataM, .PAdrM, .LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest, .IMAWriteDataM, .SquashSCW, .LSURWM); end else begin:lrsc assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign IMAWriteDataM = IHWriteDataM; end - if (`F_SUPPORTED) - mux2 #(`LLEN) datamux({{{`LLEN-`XLEN}{1'b0}}, IMAWriteDataM}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); + if (P.F_SUPPORTED) + mux2 #(P.LLEN) datamux({{{P.LLEN-P.XLEN}{1'b0}}, IMAWriteDataM}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); else assign IMAFWriteDataM = IMAWriteDataM; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -342,13 +340,13 @@ module lsu ( subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - swbytemask #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + swbytemask #(P.LLEN) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register ///////////////////////////////////////////////////////////////////////////////////////////// - flopen #(`LLEN) ReadDataMWReg(clk, ~StallW, ReadDataM, ReadDataW); + flopen #(P.LLEN) ReadDataMWReg(clk, ~StallW, ReadDataM, ReadDataW); ///////////////////////////////////////////////////////////////////////////////////////////// // Big Endian Byte Swapper @@ -356,9 +354,9 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// - if (`BIGENDIAN_SUPPORTED) begin:endian - endianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + if (P.BIGENDIAN_SUPPORTED) begin:endian + endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 2cc76e8f0..e627f598d 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -29,20 +29,18 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -`include "wally-config.vh" - -module hptw ( +module hptw import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCSpillF, // addresses to translate - input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate + input logic [P.XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table + input logic [P.XLEN-1:0] PCSpillF, // addresses to translate + input logic [P.XLEN+1:0] IEUAdrExtM, // addresses to translate input logic [1:0] MemRWM, AtomicM, // system status input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic [1:0] STATUS_MPP, input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] ReadDataM, // page table entry from LSU - input logic [`XLEN-1:0] WriteDataM, + input logic [P.XLEN-1:0] ReadDataM, // page table entry from LSU + input logic [P.XLEN-1:0] WriteDataM, input logic DCacheStallM, // stall from LSU input logic [2:0] Funct3M, input logic [6:0] Funct7M, @@ -51,12 +49,12 @@ module hptw ( input logic FlushW, input logic InstrUpdateDAF, input logic DataUpdateDAM, - output logic [`XLEN-1:0] PTE, // page table entry to TLBs + output logic [P.XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry output logic [1:0] PreLSURWM, - output logic [`XLEN+1:0] IHAdrM, - output logic [`XLEN-1:0] IHWriteDataM, + output logic [P.XLEN+1:0] IHAdrM, + output logic [P.XLEN-1:0] IHWriteDataM, output logic [1:0] LSUAtomicM, output logic [2:0] LSUFunct3M, output logic [6:0] LSUFunct7M, @@ -74,8 +72,8 @@ module hptw ( LEAF, IDLE, UPDATE_PTE} statetype; logic DTLBWalk; // register TLBs translation miss requests - logic [`PPN_BITS-1:0] BasePageTablePPN; - logic [`PPN_BITS-1:0] CurrentPPN; + logic [P.PPN_BITS-1:0] BasePageTablePPN; + logic [P.PPN_BITS-1:0] CurrentPPN; logic Executable, Writable, Readable, Valid, PTE_U; logic Misaligned, MegapageMisaligned; logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; @@ -83,18 +81,18 @@ module hptw ( logic TLBMiss; logic PRegEn; logic [1:0] NextPageType; - logic [`SVMODE_BITS-1:0] SvMode; - logic [`XLEN-1:0] TranslationVAdr; - logic [`XLEN-1:0] NextPTE; + logic [P.SVMODE_BITS-1:0] SvMode; + logic [P.XLEN-1:0] TranslationVAdr; + logic [P.XLEN-1:0] NextPTE; logic UpdatePTE; logic HPTWUpdateDA; - logic [`PA_BITS-1:0] HPTWReadAdr; + logic [P.PA_BITS-1:0] HPTWReadAdr; logic SelHPTWAdr; - logic [`XLEN+1:0] HPTWAdrExt; + logic [P.XLEN+1:0] HPTWAdrExt; logic ITLBMissOrUpdateDAF; logic DTLBMissOrUpdateDAM; logic LSUAccessFaultM; - logic [`PA_BITS-1:0] HPTWAdr; + logic [P.PA_BITS-1:0] HPTWAdr; logic [1:0] HPTWRW; logic [2:0] HPTWSize; // 32 or 64 bit access statetype WalkerState, NextWalkerState, InitialWalkerState; @@ -119,18 +117,18 @@ module hptw ( assign HPTWInstrAccessFaultF = TakeHPTWFaultDelay ? HPTWInstrAccessFaultDelay : 1'b0; // Extract bits from CSRs and inputs - assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; - assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; + assign SvMode = SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]; + assign BasePageTablePPN = SATP_REGW[P.PPN_BITS-1:0]; assign TLBMiss = (DTLBMissOrUpdateDAM | ITLBMissOrUpdateDAF); // Determine which address to translate - mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr); - assign CurrentPPN = PTE[`PPN_BITS+9:10]; + mux2 #(P.XLEN) vadrmux(PCSpillF, IEUAdrExtM[P.XLEN-1:0], DTLBWalk, TranslationVAdr); + assign CurrentPPN = PTE[P.PPN_BITS+9:10]; // State flops flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE; - flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache + flopenr #(P.XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache // Assign PTE descriptors common across all XLEN values // For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table @@ -140,7 +138,7 @@ module hptw ( assign ValidLeafPTE = ValidPTE & LeafPTE; assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; - if(`SVADU_SUPPORTED) begin : hptwwrites + if(P.SVADU_SUPPORTED) begin : hptwwrites logic ReadAccess, WriteAccess; logic InvalidRead, InvalidWrite, InvalidOp; logic UpperBitsUnequal; @@ -148,18 +146,18 @@ module hptw ( logic [1:0] EffectivePrivilegeMode; logic ImproperPrivilege; logic SaveHPTWAdr, SelHPTWWriteAdr; - logic [`PA_BITS-1:0] HPTWWriteAdr; + logic [P.PA_BITS-1:0] HPTWWriteAdr; logic SetDirty; logic Dirty, Accessed; - logic [`XLEN-1:0] AccessedPTE; + logic [P.XLEN-1:0] AccessedPTE; - assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit - mux2 #(`XLEN) NextPTEMux(ReadDataM, AccessedPTE, UpdatePTE, NextPTE); - flopenr #(`PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr); + assign AccessedPTE = {PTE[P.XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit + mux2 #(P.XLEN) NextPTEMux(ReadDataM, AccessedPTE, UpdatePTE, NextPTE); + flopenr #(P.PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr); assign SaveHPTWAdr = WalkerState == L0_ADR; assign SelHPTWWriteAdr = UpdatePTE | HPTWRW[0]; - mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); + mux2 #(P.PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); assign {Dirty, Accessed} = PTE[7:6]; assign WriteAccess = MemRWM[0]; // implies | (|AtomicM); @@ -167,11 +165,11 @@ module hptw ( assign ReadAccess = MemRWM[1]; assign EffectivePrivilegeMode = DTLBWalk ? (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW) : PrivilegeModeW; // DTLB uses MPP mode when MPRV is 1 - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); + assign ImproperPrivilege = ((EffectivePrivilegeMode == P.U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == P.S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); // Check for page faults - vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), + vm64check vm64check(.SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .VAdr(TranslationVAdr), .SV39Mode(), .UpperBitsUnequal); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; @@ -212,16 +210,16 @@ module hptw ( endcase // HPTWAdr muxing - if (`XLEN==32) begin // RV32 + if (P.XLEN==32) begin // RV32 logic [9:0] VPN; - logic [`PPN_BITS-1:0] PPN; + logic [P.PPN_BITS-1:0] PPN; assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; assign HPTWReadAdr = {PPN, VPN, 2'b00}; assign HPTWSize = 3'b010; end else begin // RV64 logic [8:0] VPN; - logic [`PPN_BITS-1:0] PPN; + logic [P.PPN_BITS-1:0] PPN; always_comb case (WalkerState) // select VPN field based on HPTW state L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; @@ -230,19 +228,19 @@ module hptw ( default: VPN = TranslationVAdr[20:12]; endcase assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | - (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; + (SvMode != P.SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; assign HPTWReadAdr = {PPN, VPN, 3'b000}; assign HPTWSize = 3'b011; end // Initial state and misalignment for RV32/64 - if (`XLEN == 32) begin + if (P.XLEN == 32) begin assign InitialWalkerState = L1_ADR; assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned); end else begin logic GigapageMisaligned, TerapageMisaligned; - assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR; + assign InitialWalkerState = (SvMode == P.SV48) ? L3_ADR : L2_ADR; assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 @@ -281,7 +279,7 @@ module hptw ( L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else if(LSUAccessFaultM) NextWalkerState = IDLE; else NextWalkerState = LEAF; - LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; + LEAF: if (P.SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; @@ -293,8 +291,8 @@ module hptw ( assign HPTWAccessFaultDelay = HPTWLoadAccessFaultDelay | HPTWStoreAmoAccessFaultDelay | HPTWInstrAccessFaultDelay; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss & ~(HPTWAccessFaultDelay)); - assign ITLBMissOrUpdateDAF = ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF); - assign DTLBMissOrUpdateDAM = DTLBMissM | (`SVADU_SUPPORTED & DataUpdateDAM); + assign ITLBMissOrUpdateDAF = ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF); + assign DTLBMissOrUpdateDAM = DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM); // HTPW address/data/control muxing @@ -304,15 +302,15 @@ module hptw ( // always block interrupts when using the hardware page table walker. // multiplex the outputs to LSU - if(`XLEN == 64) assign HPTWAdrExt = {{(`XLEN+2-`PA_BITS){1'b0}}, HPTWAdr}; // extend to 66 bits + if(P.XLEN == 64) assign HPTWAdrExt = {{(P.XLEN+2-P.PA_BITS){1'b0}}, HPTWAdr}; // extend to 66 bits else assign HPTWAdrExt = HPTWAdr; mux2 #(2) rwmux(MemRWM, HPTWRW, SelHPTW, PreLSURWM); mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M); mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); - mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); - if(`SVADU_SUPPORTED) - mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IHWriteDataM); + mux2 #(P.XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); + if(P.SVADU_SUPPORTED) + mux2 #(P.XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IHWriteDataM); else assign IHWriteDataM = WriteDataM; endmodule diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 380787e6b..517b16860 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -26,24 +26,23 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( +module mmu import cvw::*; #(parameter cvw_t P, + parameter TLB_ENTRIES = 8, IMMU = 0) ( input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, // Current value of satp CSR (from privileged unit) + input logic [P.XLEN-1:0] SATP_REGW, // Current value of satp CSR (from privileged unit) input logic STATUS_MXR, // Status CSR: make executable page readable input logic STATUS_SUM, // Status CSR: Supervisor access to user memory input logic STATUS_MPRV, // Status CSR: modify machine privilege input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic DisableTranslation, // virtual address translation disabled during D$ flush and HPTW walk that use physical addresses - input logic [`XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW + input logic [P.XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW input logic [1:0] Size, // access size: 00 = 8 bits, 01 = 16 bits, 10 = 32 bits , 11 = 64 bits - input logic [`XLEN-1:0] PTE, // page table entry + input logic [P.XLEN-1:0] PTE, // page table entry input logic [1:0] PageTypeWriteVal, // page type input logic TLBWrite, // write TLB entry input logic TLBFlush, // Invalidate all TLB entries - output logic [`PA_BITS-1:0] PhysicalAddress, // PAdr when no translation, or translated VAdr (TLBPAdr) when there is translation + output logic [P.PA_BITS-1:0] PhysicalAddress, // PAdr when no translation, or translated VAdr (TLBPAdr) when there is translation output logic TLBMiss, // Miss TLB output logic Cacheable, // PMA indicates memory address is cachable output logic Idempotent, // PMA indicates memory address is idempotent @@ -55,11 +54,11 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( output logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned fault sources // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // access type - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration - input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP addresses + input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration + input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP addresses ); - logic [`PA_BITS-1:0] TLBPAdr; // physical address for TLB + logic [P.PA_BITS-1:0] TLBPAdr; // physical address for TLB logic PMAInstrAccessFaultF; // Instruction access fault from PMA logic PMPInstrAccessFaultF; // Instruction access fault from PMP logic PMALoadAccessFaultM; // Load access fault from PMA @@ -73,15 +72,15 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults // only instantiate TLB if Virtual Memory is supported - if (`VIRTMEM_SUPPORTED) begin:tlb + if (P.VIRTMEM_SUPPORTED) begin:tlb logic ReadAccess, WriteAccess; assign ReadAccess = ExecuteAccessF | ReadAccessM; // execute also acts as a TLB read. Execute and Read are never active for the same MMU, so safe to mix pipestages assign WriteAccess = WriteAccessM; tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) tlb( .clk, .reset, - .SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), - .SATP_ASID(SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]), - .VAdr(VAdr[`XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), + .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), + .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, @@ -96,7 +95,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( // If translation is occuring, select translated physical address from TLB // the lower 12 bits are the page offset. These are never changed from the orginal // non translated address. - mux2 #(`PA_BITS-12) addressmux(VAdr[`PA_BITS-1:12], TLBPAdr[`PA_BITS-1:12], Translate, PhysicalAddress[`PA_BITS-1:12]); + mux2 #(P.PA_BITS-12) addressmux(VAdr[P.PA_BITS-1:12], TLBPAdr[P.PA_BITS-1:12], Translate, PhysicalAddress[P.PA_BITS-1:12]); assign PhysicalAddress[11:0] = VAdr[11:0]; /////////////////////////////////////////// @@ -108,7 +107,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); - if (`PMP_ENTRIES > 0) begin : pmp + if (P.PMP_ENTRIES > 0) begin : pmp pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 05be8cfa1..95b57c848 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -208,7 +208,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE, .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD); - lsu lsu( + lsu #(P) lsu( .clk, .reset, .StallM, .FlushM, .StallW, .FlushW, // CPU interface .MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), .AtomicM, From 438c955d1c7c4d8a44286bfb41c9194a3cc2b4a1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 17:20:55 -0500 Subject: [PATCH 07/20] PM(P/A) checkers parameterized based on Lim's work. --- src/mmu/mmu.sv | 4 ++-- src/mmu/pmachecker.sv | 6 ++---- src/mmu/pmpadrdec.sv | 16 +++++++--------- src/mmu/pmpchecker.sv | 28 +++++++++++++--------------- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 517b16860..636680f3c 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -102,13 +102,13 @@ module mmu import cvw::*; #(parameter cvw_t P, // Check physical memory accesses /////////////////////////////////////////// - pmachecker pmachecker(.PhysicalAddress, .Size, + pmachecker #(P.PA_BITS) pmachecker(.PhysicalAddress, .Size, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp - pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, + pmpchecker #(P) pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index ace481f74..bba1f7b6f 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -28,10 +28,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module pmachecker ( - input logic [`PA_BITS-1:0] PhysicalAddress, +module pmachecker #(parameter PA_BITS) ( + input logic [PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, input logic AtomicAccessM, // Atomic access input logic ExecuteAccessF, // Execute access diff --git a/src/mmu/pmpadrdec.sv b/src/mmu/pmpadrdec.sv index a4fb8068b..4c8e1d002 100644 --- a/src/mmu/pmpadrdec.sv +++ b/src/mmu/pmpadrdec.sv @@ -30,12 +30,10 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module pmpadrdec ( - input logic [`PA_BITS-1:0] PhysicalAddress, +module pmpadrdec import cvw::*; #(parameter cvw_t P) ( + input logic [P.PA_BITS-1:0] PhysicalAddress, input logic [7:0] PMPCfg, - input logic [`PA_BITS-3:0] PMPAdr, + input logic [P.PA_BITS-3:0] PMPAdr, input logic PAgePMPAdrIn, output logic PAgePMPAdrOut, output logic Match, @@ -49,7 +47,7 @@ module pmpadrdec ( logic TORMatch, NAMatch; logic PAltPMPAdr; - logic [`PA_BITS-1:0] CurrentAdrFull; + logic [P.PA_BITS-1:0] CurrentAdrFull; logic [1:0] AdrMode; assign AdrMode = PMPCfg[4:3]; @@ -66,13 +64,13 @@ module pmpadrdec ( assign TORMatch = PAgePMPAdrIn & PAltPMPAdr; // exclusion-tag: PAgePMPAdrIn // Naturally aligned regions - logic [`PA_BITS-1:0] NAMask, NABase; + logic [P.PA_BITS-1:0] NAMask, NABase; assign NAMask[1:0] = {2'b11}; - assign NAMask[`PA_BITS-1:2] = (PMPAdr + {{(`PA_BITS-3){1'b0}}, (AdrMode == NAPOT)}) ^ PMPAdr; + assign NAMask[P.PA_BITS-1:2] = (PMPAdr + {{(P.PA_BITS-3){1'b0}}, (AdrMode == NAPOT)}) ^ PMPAdr; // form a mask where the bottom k bits are 1, corresponding to a size of 2^k bytes for this memory region. // This assumes we're using at least an NA4 region, but works for any size NAPOT region. - assign NABase = {(PMPAdr & ~NAMask[`PA_BITS-1:2]), 2'b00}; // base physical address of the pmp. + assign NABase = {(PMPAdr & ~NAMask[P.PA_BITS-1:2]), 2'b00}; // base physical address of the pmp. assign NAMatch = &((NABase ~^ PhysicalAddress) | NAMask); // check if upper bits of base address match, ignore lower bits correspoonding to inside the memory range diff --git a/src/mmu/pmpchecker.sv b/src/mmu/pmpchecker.sv index e7c660ca0..0b4cec4a1 100644 --- a/src/mmu/pmpchecker.sv +++ b/src/mmu/pmpchecker.sv @@ -29,10 +29,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module pmpchecker ( - input logic [`PA_BITS-1:0] PhysicalAddress, +module pmpchecker import cvw::*; #(parameter cvw_t P) ( + input logic [P.PA_BITS-1:0] PhysicalAddress, input logic [1:0] PrivilegeModeW, // ModelSim has a switch -svinputport which controls whether input ports // are nets (wires) or vars by default. The default setting of this switch is @@ -41,8 +39,8 @@ module pmpchecker ( // this will be understood as a var. However, if we don't supply the `var` // keyword, the compiler warns us that it's interpreting the signal as a var, // which we might not intend. - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], + input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, output logic PMPInstrAccessFaultF, output logic PMPLoadAccessFaultM, @@ -51,25 +49,25 @@ module pmpchecker ( // Bit i is high when the address falls in PMP region i logic EnforcePMP; // should PMP be checked in this privilege level - logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges - logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. - logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + logic [P.PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges + logic [P.PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. + logic [P.PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set + logic [P.PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] - if (`PMP_ENTRIES > 0) begin: pmp // prevent complaints about array of no elements when PMP_ENTRIES = 0 - pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( + if (P.PMP_ENTRIES > 0) begin: pmp // prevent complaints about array of no elements when PMP_ENTRIES = 0 + pmpadrdec #(P) pmpadrdecs[P.PMP_ENTRIES-1:0]( .PhysicalAddress, .PMPCfg(PMPCFG_ARRAY_REGW), .PMPAdr(PMPADDR_ARRAY_REGW), - .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), + .PAgePMPAdrIn({PAgePMPAdr[P.PMP_ENTRIES-2:0], 1'b1}), .PAgePMPAdrOut(PAgePMPAdr), .Match, .L, .X, .W, .R); end - priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. + priorityonehot #(P.PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. // Only enforce PMP checking for S and U modes or in Machine mode when L bit is set in selected region - assign EnforcePMP = (PrivilegeModeW != `M_MODE) | |(L & FirstMatch); // *** switch to this logic when PMP is initialized for non-machine mode + assign EnforcePMP = (PrivilegeModeW != P.M_MODE) | |(L & FirstMatch); // *** switch to this logic when PMP is initialized for non-machine mode assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; From 7c364d5a77a643e98cafe59767f606a6d46f949a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 24 May 2023 18:02:22 -0500 Subject: [PATCH 08/20] Updated mmu's tlb and hptw to use Lim's parameterization. --- config/buildroot/config.vh | 1 + config/rv32e/config.vh | 1 + config/rv32gc/config.vh | 2 +- config/rv32i/config.vh | 3 +- config/rv32imc/config.vh | 1 + config/rv64fpquad/config.vh | 1 + config/rv64gc/config.vh | 2 +- config/rv64i/config.vh | 1 + src/mmu/hptw.sv | 2 +- src/mmu/mmu.sv | 10 +++--- src/mmu/tlb/tlb.sv | 61 ++++++++++++++++++------------------- src/mmu/tlb/tlbcam.sv | 10 +++--- src/mmu/tlb/tlbcamline.sv | 12 +++----- src/mmu/tlb/tlbcontrol.sv | 24 +++++++-------- src/mmu/tlb/tlbmixer.sv | 24 +++++++-------- src/mmu/tlb/tlbram.sv | 19 ++++++------ src/mmu/tlb/tlbramline.sv | 2 -- src/mmu/tlb/vm64check.sv | 10 +++--- 18 files changed, 90 insertions(+), 96 deletions(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 10e20a362..59848a39a 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -133,6 +133,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam BPRED_SUPPORTED = 1; localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT; localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index be5862f58..e41db6068 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -133,6 +133,7 @@ localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 0; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 1e0a0fb23..855a374ae 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -139,7 +139,7 @@ localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N localparam BPRED_SIZE = 32'd16; localparam BTB_SIZE = 32'd10; -localparam SVADU_SUPPORTED = 0; +localparam SVADU_SUPPORTED = 1; localparam ZMMUL_SUPPORTED = 0; // FPU division architecture diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index affee30c2..54f9791a5 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -133,6 +133,7 @@ localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 0; @@ -151,4 +152,4 @@ localparam ZBS_SUPPORTED = 0; // Memory synthesis configuration localparam USE_SRAM = 0; -`include "test-shared.vh" \ No newline at end of file +`include "test-shared.vh" diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index cd029635f..5b0535ad9 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -132,6 +132,7 @@ localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 0; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 63b919e9c..656e5c90c 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -135,6 +135,7 @@ localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 1; localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 0; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 1ef0bc08c..3e6ba806e 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -140,7 +140,7 @@ localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS localparam BPRED_SIZE = 32'd10; localparam BTB_SIZE = 32'd10; -localparam SVADU_SUPPORTED = 0; +localparam SVADU_SUPPORTED = 1; localparam ZMMUL_SUPPORTED = 0; // FPU division architecture diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 9afb890cf..15c0e7994 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -135,6 +135,7 @@ localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 0; diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index e627f598d..17a98acd3 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -169,7 +169,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( ((EffectivePrivilegeMode == P.S_MODE) & PTE_U & (~STATUS_SUM & DTLBWalk)); // Check for page faults - vm64check vm64check(.SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .VAdr(TranslationVAdr), + vm64check #(P) vm64check(.SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .VAdr(TranslationVAdr), .SV39Mode(), .UpperBitsUnequal); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 636680f3c..636ca9584 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -29,16 +29,16 @@ module mmu import cvw::*; #(parameter cvw_t P, parameter TLB_ENTRIES = 8, IMMU = 0) ( input logic clk, reset, - input logic [P.XLEN-1:0] SATP_REGW, // Current value of satp CSR (from privileged unit) + input logic [P.XLEN-1:0] SATP_REGW, // Current value of satp CSR (from privileged unit) input logic STATUS_MXR, // Status CSR: make executable page readable input logic STATUS_SUM, // Status CSR: Supervisor access to user memory input logic STATUS_MPRV, // Status CSR: modify machine privilege input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic DisableTranslation, // virtual address translation disabled during D$ flush and HPTW walk that use physical addresses - input logic [P.XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW + input logic [P.XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW input logic [1:0] Size, // access size: 00 = 8 bits, 01 = 16 bits, 10 = 32 bits , 11 = 64 bits - input logic [P.XLEN-1:0] PTE, // page table entry + input logic [P.XLEN-1:0] PTE, // page table entry input logic [1:0] PageTypeWriteVal, // page type input logic TLBWrite, // write TLB entry input logic TLBFlush, // Invalidate all TLB entries @@ -58,7 +58,7 @@ module mmu import cvw::*; #(parameter cvw_t P, input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP addresses ); - logic [P.PA_BITS-1:0] TLBPAdr; // physical address for TLB + logic [P.PA_BITS-1:0] TLBPAdr; // physical address for TLB logic PMAInstrAccessFaultF; // Instruction access fault from PMA logic PMPInstrAccessFaultF; // Instruction access fault from PMP logic PMALoadAccessFaultM; // Load access fault from PMA @@ -76,7 +76,7 @@ module mmu import cvw::*; #(parameter cvw_t P, logic ReadAccess, WriteAccess; assign ReadAccess = ExecuteAccessF | ReadAccessM; // execute also acts as a TLB read. Execute and Read are never active for the same MMU, so safe to mix pipestages assign WriteAccess = WriteAccessM; - tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) tlb( + tlb #(.P(P), .TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) tlb( .clk, .reset, .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index 64fd3c95d..c081b0925 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -49,36 +49,35 @@ * RSW(2) -- for OS */ -`include "wally-config.vh" - // The TLB will have 2**ENTRY_BITS total entries -module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( - input logic clk, reset, - input logic [`SVMODE_BITS-1:0] SATP_MODE, // Current address translation mode - input logic [`ASID_BITS-1:0] SATP_ASID, - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor - input logic ReadAccess, - input logic WriteAccess, - input logic DisableTranslation, - input logic [`XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) - input logic [`XLEN-1:0] PTE, - input logic [1:0] PageTypeWriteVal, - input logic TLBWrite, - input logic TLBFlush, - output logic [`PA_BITS-1:0] TLBPAdr, - output logic TLBMiss, - output logic TLBHit, - output logic Translate, - output logic TLBPageFault, - output logic UpdateDA +module tlb import cvw::*; #(parameter cvw_t P, + parameter TLB_ENTRIES = 8, ITLB = 0) ( + input logic clk, reset, + input logic [P.SVMODE_BITS-1:0] SATP_MODE, // Current address translation mode + input logic [P.ASID_BITS-1:0] SATP_ASID, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor + input logic ReadAccess, + input logic WriteAccess, + input logic DisableTranslation, + input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) + input logic [P.XLEN-1:0] PTE, + input logic [1:0] PageTypeWriteVal, + input logic TLBWrite, + input logic TLBFlush, + output logic [P.PA_BITS-1:0] TLBPAdr, + output logic TLBMiss, + output logic TLBHit, + output logic Translate, + output logic TLBPageFault, + output logic UpdateDA ); logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses - logic [`VPN_BITS-1:0] VPN; - logic [`PPN_BITS-1:0] PPN; + logic [P.VPN_BITS-1:0] VPN; + logic [P.PPN_BITS-1:0] PPN; // Sections of the page table entry logic [7:0] PTEAccessBits; logic [1:0] HitPageType; @@ -87,7 +86,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( logic Misaligned; logic MegapageMisaligned; - if(`XLEN == 32) begin + if(P.XLEN == 32) begin assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 assign Misaligned = (HitPageType == 2'b01) & MegapageMisaligned; end else begin // 64-bit @@ -100,22 +99,22 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( ((HitPageType == 2'b01) & MegapageMisaligned); end - assign VPN = VAdr[`VPN_BITS+11:12]; + assign VPN = VAdr[P.VPN_BITS+11:12]; - tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, .UpdateDA, .SV39Mode, .Translate); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); - tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) + tlbcam #(P, TLB_ENTRIES, P.VPN_BITS + P.ASID_BITS, P.VPN_SEGMENT_BITS) tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .SATP_ASID, .Matches, .HitPageType, .CAMHit); - tlbram #(TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); + tlbram #(P, TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); // Replace segments of the virtual page number with segments of the physical // page number. For 4 KB pages, the entire virtual page number is replaced. // For superpages, some segments are considered offsets into a larger page. - tlbmixer Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr); + tlbmixer #(P) Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr); endmodule diff --git a/src/mmu/tlb/tlbcam.sv b/src/mmu/tlb/tlbcam.sv index 449411e20..38bc8c4c9 100644 --- a/src/mmu/tlb/tlbcam.sv +++ b/src/mmu/tlb/tlbcam.sv @@ -29,17 +29,15 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module tlbcam #(parameter TLB_ENTRIES = 8, KEY_BITS = 20, SEGMENT_BITS = 10) ( +module tlbcam import cvw::*; #(parameter cvw_t P, TLB_ENTRIES = 8, KEY_BITS = 20, SEGMENT_BITS = 10) ( input logic clk, reset, - input logic [`VPN_BITS-1:0] VPN, + input logic [P.VPN_BITS-1:0] VPN, input logic [1:0] PageTypeWriteVal, input logic SV39Mode, input logic TLBFlush, input logic [TLB_ENTRIES-1:0] WriteEnables, input logic [TLB_ENTRIES-1:0] PTE_Gs, - input logic [`ASID_BITS-1:0] SATP_ASID, + input logic [P.ASID_BITS-1:0] SATP_ASID, output logic [TLB_ENTRIES-1:0] Matches, output logic [1:0] HitPageType, output logic CAMHit @@ -53,7 +51,7 @@ module tlbcam #(parameter TLB_ENTRIES = 8, KEY_BITS = 20, SEGMENT_BITS = 10) ( // of page type. However, matches are determined based on a subset of the // page number segments. - tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( + tlbcamline #(P, KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( .clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PageTypeWriteVal, .TLBFlush, .WriteEnable(WriteEnables), .PageTypeRead, .Match(Matches)); assign CAMHit = |Matches & ~TLBFlush; diff --git a/src/mmu/tlb/tlbcamline.sv b/src/mmu/tlb/tlbcamline.sv index 9f7a68e81..127326c20 100644 --- a/src/mmu/tlb/tlbcamline.sv +++ b/src/mmu/tlb/tlbcamline.sv @@ -29,12 +29,10 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module tlbcamline #(parameter KEY_BITS = 20, SEGMENT_BITS = 10) ( +module tlbcamline import cvw::*; #(parameter cvw_t P, KEY_BITS = 20, SEGMENT_BITS = 10) ( input logic clk, reset, - input logic [`VPN_BITS-1:0] VPN, // The requested page number to compare against the key - input logic [`ASID_BITS-1:0] SATP_ASID, + input logic [P.VPN_BITS-1:0] VPN, // The requested page number to compare against the key + input logic [P.ASID_BITS-1:0] SATP_ASID, input logic SV39Mode, input logic WriteEnable, // Write a new entry to this line input logic PTE_G, @@ -56,13 +54,13 @@ module tlbcamline #(parameter KEY_BITS = 20, SEGMENT_BITS = 10) ( logic [1:0] PageType; // Split up key and query into sections for each page table level. - logic [`ASID_BITS-1:0] Key_ASID; + logic [P.ASID_BITS-1:0] Key_ASID; logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; logic MatchASID, Match0, Match1; assign MatchASID = (SATP_ASID == Key_ASID) | PTE_G; - if (`XLEN == 32) begin: match + if (P.XLEN == 32) begin: match assign {Key_ASID, Key1, Key0} = Key; assign {Query1, Query0} = VPN; diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 9754124da..67d598038 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -26,11 +26,9 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module tlbcontrol #(parameter ITLB = 0) ( - input logic [`SVMODE_BITS-1:0] SATP_MODE, - input logic [`XLEN-1:0] VAdr, +module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( + input logic [P.SVMODE_BITS-1:0] SATP_MODE, + input logic [P.XLEN-1:0] VAdr, input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic [1:0] STATUS_MPP, input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor @@ -58,13 +56,13 @@ module tlbcontrol #(parameter ITLB = 0) ( // Grab the sv mode from SATP and determine whether translation should occur assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 - assign Translate = (SATP_MODE != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~DisableTranslation; + assign Translate = (SATP_MODE != P.NO_TRANSLATE[P.SVMODE_BITS-1:0]) & (EffectivePrivilegeMode != P.M_MODE) & ~DisableTranslation; // Determine whether TLB is being used assign TLBAccess = ReadAccess | WriteAccess; // Check that upper bits are legal (all 0s or all 1s) - vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); + vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; @@ -74,9 +72,9 @@ module tlbcontrol #(parameter ITLB = 0) ( if (ITLB == 1) begin:itlb // Instruction TLB fault checking // User mode may only execute user mode pages, and supervisor mode may // only execute non-user mode pages. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - if(`SVADU_SUPPORTED) begin : hptwwrites + assign ImproperPrivilege = ((EffectivePrivilegeMode == P.U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == P.S_MODE) & PTE_U); + if(P.SVADU_SUPPORTED) begin : hptwwrites assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V); end else begin @@ -89,8 +87,8 @@ module tlbcontrol #(parameter ITLB = 0) ( // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. - assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | - ((EffectivePrivilegeMode == `S_MODE) & PTE_U & ~STATUS_SUM); + assign ImproperPrivilege = ((EffectivePrivilegeMode == P.U_MODE) & ~PTE_U) | + ((EffectivePrivilegeMode == P.S_MODE) & PTE_U & ~STATUS_SUM); // Check for read error. Reads are invalid when the page is not readable // (and executable pages are not readable) or when the page is neither // readable nor executable (and executable pages are readable). @@ -98,7 +96,7 @@ module tlbcontrol #(parameter ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - if(`SVADU_SUPPORTED) begin : hptwwrites + if(P.SVADU_SUPPORTED) begin : hptwwrites assign UpdateDA = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequal | Misaligned | ~PTE_V)); end else begin diff --git a/src/mmu/tlb/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv index f5555c2a1..07d6eb985 100644 --- a/src/mmu/tlb/tlbmixer.sv +++ b/src/mmu/tlb/tlbmixer.sv @@ -29,24 +29,22 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module tlbmixer ( - input logic [`VPN_BITS-1:0] VPN, - input logic [`PPN_BITS-1:0] PPN, +module tlbmixer import cvw::*; #(parameter cvw_t P) ( + input logic [P.VPN_BITS-1:0] VPN, + input logic [P.PPN_BITS-1:0] PPN, input logic [1:0] HitPageType, input logic [11:0] Offset, input logic TLBHit, - output logic [`PA_BITS-1:0] TLBPAdr + output logic [P.PA_BITS-1:0] TLBPAdr ); - localparam EXTRA_BITS = `PPN_BITS - `VPN_BITS; - logic [`PPN_BITS-1:0] ZeroExtendedVPN; - logic [`PPN_BITS-1:0] PageNumberMask; - logic [`PPN_BITS-1:0] PPNMixed; + localparam EXTRA_BITS = P.PPN_BITS - P.VPN_BITS; + logic [P.PPN_BITS-1:0] ZeroExtendedVPN; + logic [P.PPN_BITS-1:0] PageNumberMask; + logic [P.PPN_BITS-1:0] PPNMixed; // produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages - if (`XLEN == 32) + if (P.XLEN == 32) // kilopage: 22 bits of PPN, 0 bits of VPN // megapage: 12 bits of PPN, 10 bits of VPN mux2 #(22) pnm(22'h000000, 22'h0003FF, HitPageType[0], PageNumberMask); @@ -60,10 +58,10 @@ module tlbmixer ( // merge low segments of VPN with high segments of PPN decided by the pagetype. assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // - //mux2 #(1) mixmux[`PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed); + //mux2 #(1) mixmux[P.PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed); //assign PPNMixed = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); // Output the hit physical address if translation is currently on. // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal - mux2 #(`PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system + mux2 #(P.PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system endmodule diff --git a/src/mmu/tlb/tlbram.sv b/src/mmu/tlb/tlbram.sv index febb8b6f6..693d841a6 100644 --- a/src/mmu/tlb/tlbram.sv +++ b/src/mmu/tlb/tlbram.sv @@ -28,27 +28,26 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module tlbram #(parameter TLB_ENTRIES = 8) ( +module tlbram import cvw::*; #(parameter cvw_t P, TLB_ENTRIES = 8) ( input logic clk, reset, - input logic [`XLEN-1:0] PTE, + input logic [P.XLEN-1:0] PTE, input logic [TLB_ENTRIES-1:0] Matches, WriteEnables, - output logic [`PPN_BITS-1:0] PPN, + output logic [P.PPN_BITS-1:0] PPN, output logic [7:0] PTEAccessBits, output logic [TLB_ENTRIES-1:0] PTE_Gs ); - logic [`PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0]; - logic [`PPN_BITS+9:0] PageTableEntry; + logic [P.PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0]; + logic [P.PPN_BITS+9:0] PageTableEntry; // RAM implemented with array of flops and AND/OR read logic - tlbramline #(`PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0] + tlbramline #(P.PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0] (.clk, .reset, .re(Matches), .we(WriteEnables), - .d(PTE[`PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs)); - or_rows #(TLB_ENTRIES, `PPN_BITS+10) PTEOr(RamRead, PageTableEntry); + .d(PTE[P.PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs)); + or_rows #(TLB_ENTRIES, P.PPN_BITS+10) PTEOr(RamRead, PageTableEntry); // Rename the bits read from the TLB RAM assign PTEAccessBits = PageTableEntry[7:0]; - assign PPN = PageTableEntry[`PPN_BITS+9:10]; + assign PPN = PageTableEntry[P.PPN_BITS+9:10]; endmodule diff --git a/src/mmu/tlb/tlbramline.sv b/src/mmu/tlb/tlbramline.sv index 035c58d58..cc393f72a 100644 --- a/src/mmu/tlb/tlbramline.sv +++ b/src/mmu/tlb/tlbramline.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module tlbramline #(parameter WIDTH = 22) (input logic clk, reset, input logic re, we, diff --git a/src/mmu/tlb/vm64check.sv b/src/mmu/tlb/vm64check.sv index 5f12eef7a..10ca759a0 100644 --- a/src/mmu/tlb/vm64check.sv +++ b/src/mmu/tlb/vm64check.sv @@ -28,15 +28,15 @@ `include "wally-config.vh" -module vm64check ( - input logic [`SVMODE_BITS-1:0] SATP_MODE, - input logic [`XLEN-1:0] VAdr, +module vm64check import cvw::*; #(parameter cvw_t P) ( + input logic [P.SVMODE_BITS-1:0] SATP_MODE, + input logic [P.XLEN-1:0] VAdr, output logic SV39Mode, output logic UpperBitsUnequal ); - if (`XLEN == 64) begin - assign SV39Mode = (SATP_MODE == `SV39); + if (P.XLEN == 64) begin + assign SV39Mode = (SATP_MODE == P.SV39); // page fault if upper bits aren't all the same logic eq_63_47, eq_46_38; From 60bcd3d21a9ee89bd6dfee057f0cb51d90463327 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 10:47:09 -0500 Subject: [PATCH 09/20] Progress on LSU. --- src/lsu/amoalu.sv | 18 ++++++++---------- src/lsu/atomic.sv | 18 ++++++++---------- src/lsu/dtim.sv | 16 ++++++++-------- src/lsu/endianswap.sv | 2 +- src/lsu/lsu.sv | 4 ++-- 5 files changed, 27 insertions(+), 31 deletions(-) diff --git a/src/lsu/amoalu.sv b/src/lsu/amoalu.sv index d2670449a..524a4cf75 100644 --- a/src/lsu/amoalu.sv +++ b/src/lsu/amoalu.sv @@ -27,17 +27,15 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module amoalu ( - input logic [`XLEN-1:0] ReadDataM, // LSU's ReadData - input logic [`XLEN-1:0] IHWriteDataM, // LSU's WriteData +module amoalu import cvw::*; #(parameter cvw_t P) ( + input logic [P.XLEN-1:0] ReadDataM, // LSU's ReadData + input logic [P.XLEN-1:0] IHWriteDataM, // LSU's WriteData input logic [6:0] LSUFunct7M, // ALU Operation input logic [2:0] LSUFunct3M, // Memoy access width - output logic [`XLEN-1:0] AMOResultM // ALU output + output logic [P.XLEN-1:0] AMOResultM // ALU output ); - logic [`XLEN-1:0] a, b, y; + logic [P.XLEN-1:0] a, b, y; // *** see how synthesis generates this and optimize more structurally if necessary to share hardware // a single carry chain should be shared for + and the four min/max @@ -53,15 +51,15 @@ module amoalu ( 5'b10100: y = ($signed(a) >= $signed(b)) ? a : b; // amomax 5'b11000: y = ($unsigned(a) < $unsigned(b)) ? a : b; // amominu 5'b11100: y = ($unsigned(a) >= $unsigned(b)) ? a : b; // amomaxu - default: y = `XLEN'bx; // undefined; *** could change to b for efficiency + default: y = 'x; // undefined; *** could change to b for efficiency endcase // sign extend if necessary - if (`XLEN == 32) begin:sext + if (P.XLEN == 32) begin:sext assign a = ReadDataM; assign b = IHWriteDataM; assign AMOResultM = y; - end else begin:sext // `XLEN = 64 + end else begin:sext // P.XLEN = 64 always_comb if (LSUFunct3M[1:0] == 2'b10) begin // sign-extend word-length operations a = {{32{ReadDataM[31]}}, ReadDataM[31:0]}; diff --git a/src/lsu/atomic.sv b/src/lsu/atomic.sv index d33e85fe5..869cc2bb3 100644 --- a/src/lsu/atomic.sv +++ b/src/lsu/atomic.sv @@ -27,31 +27,29 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module atomic ( +module atomic import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, input logic StallW, - input logic [`XLEN-1:0] ReadDataM, // LSU ReadData XLEN because FPU does not issue atomic memory operation from FPU registers - input logic [`XLEN-1:0] IHWriteDataM, // LSU WriteData XLEN because FPU does not issue atomic memory operation from FPU registers - input logic [`PA_BITS-1:0] PAdrM, // Physical memory address + input logic [P.XLEN-1:0] ReadDataM, // LSU ReadData XLEN because FPU does not issue atomic memory operation from FPU registers + input logic [P.XLEN-1:0] IHWriteDataM, // LSU WriteData XLEN because FPU does not issue atomic memory operation from FPU registers + input logic [P.PA_BITS-1:0] PAdrM, // Physical memory address input logic [6:0] LSUFunct7M, // AMO alu operation gated by HPTW input logic [2:0] LSUFunct3M, // IEU or HPTW memory operation size input logic [1:0] LSUAtomicM, // 10: AMO operation, select AMOResultM as the writedata output, 01: LR/SC operation input logic [1:0] PreLSURWM, // IEU or HPTW Read/Write signal input logic IgnoreRequest, // On FlushM or TLB miss ignore memory operation - output logic [`XLEN-1:0] IMAWriteDataM, // IEU, HPTW, or AMO write data + output logic [P.XLEN-1:0] IMAWriteDataM, // IEU, HPTW, or AMO write data output logic SquashSCW, // Store conditional failed disable write to GPR output logic [1:0] LSURWM // IEU or HPTW Read/Write signal gated by LR/SC ); - logic [`XLEN-1:0] AMOResultM; + logic [P.XLEN-1:0] AMOResultM; logic MemReadM; - amoalu amoalu(.ReadDataM, .IHWriteDataM, .LSUFunct7M, .LSUFunct3M, .AMOResultM); + amoalu #(P) amoalu(.ReadDataM, .IHWriteDataM, .LSUFunct7M, .LSUFunct3M, .AMOResultM); - mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResultM, LSUAtomicM[1], IMAWriteDataM); + mux2 #(P.XLEN) wdmux(IHWriteDataM, AMOResultM, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, .SquashSCW, .LSURWM); diff --git a/src/lsu/dtim.sv b/src/lsu/dtim.sv index 0107236d1..9df70fc5b 100644 --- a/src/lsu/dtim.sv +++ b/src/lsu/dtim.sv @@ -29,29 +29,29 @@ `include "wally-config.vh" -module dtim( +module dtim import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic FlushW, input logic ce, // Chip Enable. 0: Holds ReadDataWordM input logic [1:0] MemRWM, // Read/Write control - input logic [`PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address - input logic [`LLEN-1:0] WriteDataM, // Write data from IEU - input logic [`LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write - output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection + input logic [P.PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address + input logic [P.LLEN-1:0] WriteDataM, // Write data from IEU + input logic [P.LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write + output logic [P.LLEN-1:0] ReadDataWordM // Read data before subword selection ); logic we; - localparam LLENBYTES = `LLEN/8; + localparam LLENBYTES = P.LLEN/8; // verilator lint_off WIDTH - localparam DEPTH = `DTIM_RANGE/LLENBYTES; + localparam DEPTH = P.DTIM_RANGE/LLENBYTES; // verilator lint_on WIDTH localparam ADDR_WDITH = $clog2(DEPTH); localparam OFFSET = $clog2(LLENBYTES); assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. - ram1p1rwbe #(.DEPTH(DEPTH), .WIDTH(`LLEN)) + ram1p1rwbe #(.DEPTH(DEPTH), .WIDTH(P.LLEN)) ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); endmodule diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 97846d97e..bdd56ed5b 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module endianswap #(parameter LEN=`XLEN) ( +module endianswap #(parameter LEN) ( input logic BigEndianM, input logic [LEN-1:0] a, output logic [LEN-1:0] y diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 607d3571b..a2dcc1ac2 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -229,7 +229,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. - dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), + dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[P.XLEN-1:0]), .ByteMaskM(ByteMaskM[P.XLEN/8-1:0])); end else begin @@ -321,7 +321,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// if (P.A_SUPPORTED) begin:atomic - atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[P.XLEN-1:0]), .IHWriteDataM, .PAdrM, + atomic #(P) atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[P.XLEN-1:0]), .IHWriteDataM, .PAdrM, .LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest, .IMAWriteDataM, .SquashSCW, .LSURWM); end else begin:lrsc From 3765ebfb9f7d47d63ac1abe7d5208ad84d575dee Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 11:06:48 -0500 Subject: [PATCH 10/20] PMA checker's address decoder is now parameterized. I did not see bit slicing in Lim's code. I'm not sure how they got around this issue. --- src/mmu/adrdec.sv | 8 +++----- src/mmu/adrdecs.sv | 27 +++++++++++++-------------- src/mmu/mmu.sv | 2 +- src/mmu/pmachecker.sv | 6 +++--- src/uncore/uncore.sv | 4 ++-- src/wally/wallypipelinedsoc.sv | 2 +- 6 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/mmu/adrdec.sv b/src/mmu/adrdec.sv index ddb8423b6..d2d2a1740 100644 --- a/src/mmu/adrdec.sv +++ b/src/mmu/adrdec.sv @@ -26,11 +26,9 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module adrdec ( - input logic [`PA_BITS-1:0] PhysicalAddress, // Physical address to decode - input logic [`PA_BITS-1:0] Base, Range, // Base and range of peripheral addresses +module adrdec #(parameter PA_BITS) ( + input logic [PA_BITS-1:0] PhysicalAddress, // Physical address to decode + input logic [PA_BITS-1:0] Base, Range, // Base and range of peripheral addresses input logic Supported, // Is this peripheral supported? input logic AccessValid, // Is the access type valid? input logic [1:0] Size, // Size of access diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index b1832d8c9..feda0d40b 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -26,28 +26,27 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" // verilator lint_off UNOPTFLAT -module adrdecs ( - input logic [`PA_BITS-1:0] PhysicalAddress, +module adrdecs import cvw::*; #(parameter cvw_t P) ( + input logic [P.PA_BITS-1:0] PhysicalAddress, input logic AccessRW, AccessRX, AccessRWX, input logic [1:0] Size, output logic [10:0] SelRegions ); - localparam logic [3:0] SUPPORTED_SIZE = (`LLEN == 32 ? 4'b0111 : 4'b1111); + localparam logic [3:0] SUPPORTED_SIZE = (P.LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec dtimdec(PhysicalAddress, `DTIM_BASE, `DTIM_RANGE, `DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec iromdec(PhysicalAddress, `IROM_BASE, `IROM_RANGE, `IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[9]); - adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[8]); - adrdec bootromdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[7]); - adrdec uncoreramdec(PhysicalAddress, `UNCORE_RAM_BASE, `UNCORE_RAM_RANGE, `UNCORE_RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[6]); - adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[5]); - adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[4]); - adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[3]); - adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]); - adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[1]); + adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[8]); + adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[6]); + adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[5]); + adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[4]); + adrdec #(P.PA_BITS) uartdec(PhysicalAddress, P.UART_BASE[P.PA_BITS-1:0], P.UART_RANGE[P.PA_BITS-1:0], P.UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[3]); + adrdec #(P.PA_BITS) plicdec(PhysicalAddress, P.PLIC_BASE[P.PA_BITS-1:0], P.PLIC_RANGE[P.PA_BITS-1:0], P.PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]); + adrdec #(P.PA_BITS) sdcdec(PhysicalAddress, P.SDC_BASE[P.PA_BITS-1:0], P.SDC_RANGE[P.PA_BITS-1:0], P.SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[1]); assign SelRegions[0] = ~|(SelRegions[10:1]); // none of the regions are selected diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 636ca9584..f095d9986 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -102,7 +102,7 @@ module mmu import cvw::*; #(parameter cvw_t P, // Check physical memory accesses /////////////////////////////////////////// - pmachecker #(P.PA_BITS) pmachecker(.PhysicalAddress, .Size, + pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index bba1f7b6f..e39915a32 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -28,8 +28,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module pmachecker #(parameter PA_BITS) ( - input logic [PA_BITS-1:0] PhysicalAddress, +module pmachecker import cvw::*; #(parameter cvw_t P) ( + input logic [P.PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, input logic AtomicAccessM, // Atomic access input logic ExecuteAccessF, // Execute access @@ -52,7 +52,7 @@ module pmachecker #(parameter PA_BITS) ( assign AccessRX = ReadAccessM | ExecuteAccessF; // Determine which region of physical memory (if any) is being accessed - adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-cachable diff --git a/src/uncore/uncore.sv b/src/uncore/uncore.sv index 4f00a3da4..b39061a16 100644 --- a/src/uncore/uncore.sv +++ b/src/uncore/uncore.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module uncore ( +module uncore import cvw::*; #(parameter cvw_t P)( // AHB Bus Interface input logic HCLK, HRESETn, input logic TIMECLK, @@ -87,7 +87,7 @@ module uncore ( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders // Set access types to all 1 as don't cares because the MMU has already done access checking - adrdecs adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); + adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals assign {HSELDTIM, HSELIROM, HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[10:1]; diff --git a/src/wally/wallypipelinedsoc.sv b/src/wally/wallypipelinedsoc.sv index bcb4c9523..519c715e8 100644 --- a/src/wally/wallypipelinedsoc.sv +++ b/src/wally/wallypipelinedsoc.sv @@ -85,7 +85,7 @@ module wallypipelinedsoc import cvw::*; ( // instantiate uncore if a bus interface exists if (BUS_SUPPORTED) begin : uncore - uncore uncore(.HCLK, .HRESETn, .TIMECLK, + uncore #(P) uncore(.HCLK, .HRESETn, .TIMECLK, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT, .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HSELEXT, .MTimerInt, .MSwInt, .MExtInt, .SExtInt, .GPIOIN, .GPIOOUT, .GPIOEN, .UARTSin, From 0c2a54540b775c661d82f5a47174b8a6410d6562 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 11:22:44 -0500 Subject: [PATCH 11/20] Subwordread now parameterized. --- src/lsu/dtim.sv | 2 -- src/lsu/endianswap.sv | 2 -- src/lsu/lsu.sv | 2 +- src/lsu/subwordread.sv | 34 +++++++++++++++++----------------- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/lsu/dtim.sv b/src/lsu/dtim.sv index 9df70fc5b..36b3af1a3 100644 --- a/src/lsu/dtim.sv +++ b/src/lsu/dtim.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module dtim import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic FlushW, diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index bdd56ed5b..551f08de4 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module endianswap #(parameter LEN) ( input logic BigEndianM, input logic [LEN-1:0] a, diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index a2dcc1ac2..94b52d1af 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -335,7 +335,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - subwordread subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index 49cf2c099..063df6a27 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,14 +29,14 @@ `include "wally-config.vh" -module subwordread +module subwordread #(parameter LLEN) ( - input logic [`LLEN-1:0] ReadDataWordMuxM, + input logic [LLEN-1:0] ReadDataWordMuxM, input logic [2:0] PAdrM, input logic [2:0] Funct3M, input logic FpLoadStoreM, input logic BigEndianM, - output logic [`LLEN-1:0] ReadDataM + output logic [LLEN-1:0] ReadDataM ); logic [7:0] ByteM; @@ -46,7 +46,7 @@ module subwordread // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (`LLEN == 64) begin:swrmux + if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -83,14 +83,14 @@ module subwordread // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -114,12 +114,12 @@ module subwordread // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end From dd7c7f0a39a7636897f7a5283b4b521737774936 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 11:26:09 -0500 Subject: [PATCH 12/20] Completed LSU parameterization based on Lim's changes. --- src/lsu/atomic.sv | 2 +- src/lsu/lrsc.sv | 12 +++++------- src/lsu/lsu.sv | 2 +- src/lsu/subwordread.sv | 2 -- src/lsu/subwordwrite.sv | 12 +++++------- src/lsu/swbytemask.sv | 6 ++---- 6 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/lsu/atomic.sv b/src/lsu/atomic.sv index 869cc2bb3..5c2035699 100644 --- a/src/lsu/atomic.sv +++ b/src/lsu/atomic.sv @@ -52,6 +52,6 @@ module atomic import cvw::*; #(parameter cvw_t P) ( mux2 #(P.XLEN) wdmux(IHWriteDataM, AMOResultM, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; - lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, .SquashSCW, .LSURWM); + lrsc #(P) lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, .SquashSCW, .LSURWM); endmodule diff --git a/src/lsu/lrsc.sv b/src/lsu/lrsc.sv index 5b4b37665..a1fbe6fdb 100644 --- a/src/lsu/lrsc.sv +++ b/src/lsu/lrsc.sv @@ -28,9 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module lrsc( +module lrsc import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, input logic StallW, @@ -38,20 +36,20 @@ module lrsc( input logic [1:0] PreLSURWM, // Memory operation from the HPTW or IEU [1]: read, [0]: write output logic [1:0] LSURWM, // Memory operation after potential squash of SC input logic [1:0] LSUAtomicM, // Atomic memory operaiton - input logic [`PA_BITS-1:0] PAdrM, // Physical memory address + input logic [P.PA_BITS-1:0] PAdrM, // Physical memory address output logic SquashSCW // Squash the store conditional by not allowing rf write ); // possible bug: *** double check if PreLSURWM needs to be flushed by ignorerequest. // Handle atomic load reserved / store conditional - logic [`PA_BITS-1:2] ReservationPAdrW; + logic [P.PA_BITS-1:2] ReservationPAdrW; logic ReservationValidM, ReservationValidW; logic lrM, scM, WriteAdrMatchM; logic SquashSCM; assign lrM = MemReadM & LSUAtomicM[0]; assign scM = PreLSURWM[0] & LSUAtomicM[0]; - assign WriteAdrMatchM = PreLSURWM[0] & (PAdrM[`PA_BITS-1:2] == ReservationPAdrW) & ReservationValidW; + assign WriteAdrMatchM = PreLSURWM[0] & (PAdrM[P.PA_BITS-1:2] == ReservationPAdrW) & ReservationValidW; assign SquashSCM = scM & ~WriteAdrMatchM; assign LSURWM = SquashSCM ? 2'b00 : PreLSURWM; always_comb begin // ReservationValidM (next value of valid reservation) @@ -61,7 +59,7 @@ module lrsc( else ReservationValidM = ReservationValidW; // otherwise don't change valid end - flopenr #(`PA_BITS-2) resadrreg(clk, reset, lrM & ~StallW, PAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid + flopenr #(P.PA_BITS-2) resadrreg(clk, reset, lrM & ~StallW, PAdrM[P.PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenr #(1) resvldreg(clk, reset, ~StallW, ReservationValidM, ReservationValidW); flopenr #(1) squashreg(clk, reset, ~StallW, SquashSCM, SquashSCW); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 94b52d1af..595f1eec4 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -337,7 +337,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM)); diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index 063df6a27..4c529ec07 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module subwordread #(parameter LLEN) ( input logic [LLEN-1:0] ReadDataWordMuxM, diff --git a/src/lsu/subwordwrite.sv b/src/lsu/subwordwrite.sv index ee26b78fd..f53f121e7 100644 --- a/src/lsu/subwordwrite.sv +++ b/src/lsu/subwordwrite.sv @@ -27,16 +27,14 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module subwordwrite ( +module subwordwrite #(parameter LLEN) ( input logic [2:0] LSUFunct3M, - input logic [`LLEN-1:0] IMAFWriteDataM, - output logic [`LLEN-1:0] LittleEndianWriteDataM + input logic [LLEN-1:0] IMAFWriteDataM, + output logic [LLEN-1:0] LittleEndianWriteDataM ); // Replicate data for subword writes - if (`LLEN == 128) begin:sww + if (LLEN == 128) begin:sww always_comb case(LSUFunct3M[2:0]) 3'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb @@ -45,7 +43,7 @@ module subwordwrite ( 3'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd default: LittleEndianWriteDataM = IMAFWriteDataM; // sq endcase - end else if (`LLEN == 64) begin:sww + end else if (LLEN == 64) begin:sww always_comb case(LSUFunct3M[1:0]) 2'b00: LittleEndianWriteDataM = {8{IMAFWriteDataM[7:0]}}; // sb diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index 17eedd4c4..51076fc7d 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -27,9 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module swbytemask #(parameter WORDLEN = `XLEN)( +module swbytemask #(parameter WORDLEN)( input logic [2:0] Size, input logic [$clog2(WORDLEN/8)-1:0] Adr, output logic [WORDLEN/8-1:0] ByteMask @@ -39,7 +37,7 @@ module swbytemask #(parameter WORDLEN = `XLEN)( /* Equivalent to the following - if(`XLEN == 64) begin + if(WORDLEN == 64) begin always_comb begin case(Size[1:0]) 2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb From d47951fb51920e9e5c0b3dd5e1ec404b366d567c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 12:03:46 -0500 Subject: [PATCH 13/20] The privileged unit is parameterized using Lim's method. --- src/privileged/csr.sv | 104 +++++++++++++++----------------- src/privileged/csrc.sv | 61 +++++++++---------- src/privileged/csri.sv | 10 ++- src/privileged/csrm.sv | 84 +++++++++++++------------- src/privileged/csrs.sv | 64 ++++++++++---------- src/privileged/csrsr.sv | 82 ++++++++++++------------- src/privileged/csru.sv | 14 ++--- src/privileged/privdec.sv | 20 +++--- src/privileged/privileged.sv | 28 ++++----- src/privileged/privmode.sv | 18 +++--- src/privileged/privpiperegs.sv | 2 - src/privileged/trap.sv | 12 ++-- src/wally/wallypipelinedcore.sv | 2 +- 13 files changed, 238 insertions(+), 263 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 9f3ae89b9..6affbb5d3 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -28,18 +28,14 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csr #(parameter - MIP = 12'h344, - SIP = 12'h144) ( +module csr import cvw::*; #(parameter cvw_t P, MIP = 12'h344, SIP = 12'h144) ( input logic clk, reset, input logic FlushM, FlushW, input logic StallE, StallM, StallW, input logic [31:0] InstrM, // current instruction input logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL - input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return logic - input logic [`XLEN-1:0] SrcAM, IEUAdrM, // SrcA and memory address from IEU + input logic [P.XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return logic + input logic [P.XLEN-1:0] SrcAM, IEUAdrM, // SrcA and memory address from IEU input logic CSRReadM, CSRWriteM, // read or write CSR input logic TrapM, // trap is occurring input logic mretM, sretM, wfiM, // return or WFI instruction @@ -80,48 +76,48 @@ module csr #(parameter output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, output logic [15:0] MEDELEG_REGW, - output logic [`XLEN-1:0] SATP_REGW, + output logic [P.XLEN-1:0] SATP_REGW, output logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, output logic STATUS_MIE, STATUS_SIE, output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, STATUS_TW, output logic [1:0] STATUS_FS, - output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], + output var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], + output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW, // - output logic [`XLEN-1:0] CSRReadValW, // value read from CSR - output logic [`XLEN-1:0] UnalignedPCNextF, // Next PC, accounting for traps and returns + output logic [P.XLEN-1:0] CSRReadValW, // value read from CSR + output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC, accounting for traps and returns output logic IllegalCSRAccessM, // Illegal CSR access: CSR doesn't exist or is inaccessible at this privilege level output logic BigEndianM // memory access is big-endian based on privilege mode and STATUS register endian fields ); - logic [`XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRCReadValM; - logic [`XLEN-1:0] CSRReadValM; - logic [`XLEN-1:0] CSRSrcM; - logic [`XLEN-1:0] CSRRWM, CSRRSM, CSRRCM; - logic [`XLEN-1:0] CSRWriteValM; - logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW; - logic [`XLEN-1:0] STVEC_REGW, MTVEC_REGW; - logic [`XLEN-1:0] MEPC_REGW, SEPC_REGW; + logic [P.XLEN-1:0] CSRMReadValM, CSRSReadValM, CSRUReadValM, CSRCReadValM; + logic [P.XLEN-1:0] CSRReadValM; + logic [P.XLEN-1:0] CSRSrcM; + logic [P.XLEN-1:0] CSRRWM, CSRRSM, CSRRCM; + logic [P.XLEN-1:0] CSRWriteValM; + logic [P.XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW; + logic [P.XLEN-1:0] STVEC_REGW, MTVEC_REGW; + logic [P.XLEN-1:0] MEPC_REGW, SEPC_REGW; logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW; logic WriteMSTATUSM, WriteMSTATUSHM, WriteSSTATUSM; logic CSRMWriteM, CSRSWriteM, CSRUWriteM; logic UngatedCSRMWriteM; logic WriteFRMM, WriteFFLAGSM; - logic [`XLEN-1:0] UnalignedNextEPCM, NextEPCM, NextMtvalM; + logic [P.XLEN-1:0] UnalignedNextEPCM, NextEPCM, NextMtvalM; logic [4:0] NextCauseM; logic [11:0] CSRAdrM; logic IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM; logic InsufficientCSRPrivilegeM; logic IllegalCSRMWriteReadonlyM; - logic [`XLEN-1:0] CSRReadVal2M; + logic [P.XLEN-1:0] CSRReadVal2M; logic [11:0] MIP_REGW_writeable; - logic [`XLEN-1:0] TVecM, TrapVectorM, NextFaultMtvalM; + logic [P.XLEN-1:0] TVecM, TrapVectorM, NextFaultMtvalM; logic MTrapM, STrapM; - logic [`XLEN-1:0] EPC; + logic [P.XLEN-1:0] EPC; logic RetM; logic SelMtvecM; - logic [`XLEN-1:0] TVecAlignedM; + logic [P.XLEN-1:0] TVecAlignedM; logic InstrValidNotFlushedM; logic STimerInt; @@ -136,7 +132,7 @@ module csr #(parameter if (InterruptM) NextFaultMtvalM = 0; else case (CauseM) 12, 1, 3: NextFaultMtvalM = PCM; // Instruction page/access faults, breakpoint - 2: NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrOrigM}; // Illegal instruction fault + 2: NextFaultMtvalM = {{(P.XLEN-32){1'b0}}, InstrOrigM}; // Illegal instruction fault 0, 4, 6, 13, 15, 5, 7: NextFaultMtvalM = IEUAdrM; // Instruction misaligned, Load/Store Misaligned/page/access faults default: NextFaultMtvalM = 0; // Ecall, interrupts endcase @@ -146,17 +142,17 @@ module csr #(parameter /////////////////////////////////////////// // Select trap vector from STVEC or MTVEC and word-align - assign SelMtvecM = (NextPrivilegeModeM == `M_MODE); - mux2 #(`XLEN) tvecmux(STVEC_REGW, MTVEC_REGW, SelMtvecM, TVecM); - assign TVecAlignedM = {TVecM[`XLEN-1:2], 2'b00}; + assign SelMtvecM = (NextPrivilegeModeM == P.M_MODE); + mux2 #(P.XLEN) tvecmux(STVEC_REGW, MTVEC_REGW, SelMtvecM, TVecM); + assign TVecAlignedM = {TVecM[P.XLEN-1:2], 2'b00}; // Support vectored interrupts - if(`VECTORED_INTERRUPTS_SUPPORTED) begin:vec + if(P.VECTORED_INTERRUPTS_SUPPORTED) begin:vec logic VectoredM; - logic [`XLEN-1:0] TVecPlusCauseM; + logic [P.XLEN-1:0] TVecPlusCauseM; assign VectoredM = InterruptM & (TVecM[1:0] == 2'b01); - assign TVecPlusCauseM = {TVecAlignedM[`XLEN-1:6], CauseM, 2'b00}; // 64-byte alignment allows concatenation rather than addition - mux2 #(`XLEN) trapvecmux(TVecAlignedM, TVecPlusCauseM, VectoredM, TrapVectorM); + assign TVecPlusCauseM = {TVecAlignedM[P.XLEN-1:6], CauseM, 2'b00}; // 64-byte alignment allows concatenation rather than addition + mux2 #(P.XLEN) trapvecmux(TVecAlignedM, TVecPlusCauseM, VectoredM, TrapVectorM); end else assign TrapVectorM = TVecAlignedM; // unvectored interrupt handler can be at any word-aligned address. This is called Sstvecd @@ -164,8 +160,8 @@ module csr #(parameter // A trap sets the PC to TrapVector // A return sets the PC to MEPC or SEPC assign RetM = mretM | sretM; - mux2 #(`XLEN) epcmux(SEPC_REGW, MEPC_REGW, mretM, EPC); - mux3 #(`XLEN) pcmux3(PC2NextF, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); + mux2 #(P.XLEN) epcmux(SEPC_REGW, MEPC_REGW, mretM, EPC); + mux3 #(P.XLEN) pcmux3(PC2NextF, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); /////////////////////////////////////////// // CSRWriteValM @@ -173,10 +169,10 @@ module csr #(parameter always_comb begin // Choose either rs1 or uimm[4:0] as source - CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; + CSRSrcM = InstrM[14] ? {{(P.XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; // CSR set and clear for MIP/SIP should only touch internal state, not interrupt inputs - if (CSRAdrM == MIP | CSRAdrM == SIP) CSRReadVal2M = {{(`XLEN-12){1'b0}}, MIP_REGW_writeable}; + if (CSRAdrM == MIP | CSRAdrM == SIP) CSRReadVal2M = {{(P.XLEN-12){1'b0}}, MIP_REGW_writeable}; else CSRReadVal2M = CSRReadValM; // Compute AND/OR modification @@ -197,26 +193,26 @@ module csr #(parameter assign CSRAdrM = InstrM[31:20]; assign UnalignedNextEPCM = TrapM ? ((wfiM & IntPendingM) ? PCM+4 : PCM) : CSRWriteValM; - assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment - assign NextCauseM = TrapM ? {InterruptM, CauseM}: {CSRWriteValM[`XLEN-1], CSRWriteValM[3:0]}; + assign NextEPCM = P.C_SUPPORTED ? {UnalignedNextEPCM[P.XLEN-1:1], 1'b0} : {UnalignedNextEPCM[P.XLEN-1:2], 2'b00}; // 3.1.15 alignment + assign NextCauseM = TrapM ? {InterruptM, CauseM}: {CSRWriteValM[P.XLEN-1], CSRWriteValM[3:0]}; assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; - assign UngatedCSRMWriteM = CSRWriteM & (PrivilegeModeW == `M_MODE); + assign UngatedCSRMWriteM = CSRWriteM & (PrivilegeModeW == P.M_MODE); assign CSRMWriteM = UngatedCSRMWriteM & InstrValidNotFlushedM; assign CSRSWriteM = CSRWriteM & (|PrivilegeModeW) & InstrValidNotFlushedM; assign CSRUWriteM = CSRWriteM & InstrValidNotFlushedM; - assign MTrapM = TrapM & (NextPrivilegeModeM == `M_MODE); - assign STrapM = TrapM & (NextPrivilegeModeM == `S_MODE) & `S_SUPPORTED; + assign MTrapM = TrapM & (NextPrivilegeModeM == P.M_MODE); + assign STrapM = TrapM & (NextPrivilegeModeM == P.S_MODE) & P.S_SUPPORTED; /////////////////////////////////////////// // CSRs /////////////////////////////////////////// - csri csri(.clk, .reset, + csri #(P) csri(.clk, .reset, .CSRMWriteM, .CSRSWriteM, .CSRWriteValM, .CSRAdrM, .MExtInt, .SExtInt, .MTimerInt, .STimerInt, .MSwInt, .MIDELEG_REGW, .MIP_REGW, .MIE_REGW, .MIP_REGW_writeable); - csrsr csrsr(.clk, .reset, .StallW, + csrsr #(P) csrsr(.clk, .reset, .StallW, .WriteMSTATUSM, .WriteMSTATUSHM, .WriteSSTATUSM, .TrapM, .FRegWriteM, .NextPrivilegeModeM, .PrivilegeModeW, .mretM, .sretM, .WriteFRMM, .WriteFFLAGSM, .CSRWriteValM, .SelHPTW, @@ -225,7 +221,7 @@ module csr #(parameter .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TVM, .STATUS_FS, .BigEndianM); - csrm csrm(.clk, .reset, + csrm #(P) csrm(.clk, .reset, .UngatedCSRMWriteM, .CSRMWriteM, .MTrapM, .CSRAdrM, .NextEPCM, .NextCauseM, .NextMtvalM, .MSTATUS_REGW, .MSTATUSH_REGW, .CSRWriteValM, .CSRMReadValM, .MTVEC_REGW, @@ -235,8 +231,8 @@ module csr #(parameter .IllegalCSRMAccessM, .IllegalCSRMWriteReadonlyM); - if (`S_SUPPORTED) begin:csrs - csrs csrs(.clk, .reset, + if (P.S_SUPPORTED) begin:csrs + csrs #(P) csrs(.clk, .reset, .CSRSWriteM, .STrapM, .CSRAdrM, .NextEPCM, .NextCauseM, .NextMtvalM, .SSTATUS_REGW, .STATUS_TVM, .MCOUNTEREN_TM(MCOUNTEREN_REGW[1]), @@ -256,8 +252,8 @@ module csr #(parameter end // Floating Point CSRs in User Mode only needed if Floating Point is supported - if (`F_SUPPORTED | `D_SUPPORTED) begin:csru - csru csru(.clk, .reset, .InstrValidNotFlushedM, + if (P.F_SUPPORTED | P.D_SUPPORTED) begin:csru + csru #(P) csru(.clk, .reset, .InstrValidNotFlushedM, .CSRUWriteM, .CSRAdrM, .CSRWriteValM, .STATUS_FS, .CSRUReadValM, .SetFflagsM, .FRM_REGW, .WriteFRMM, .WriteFFLAGSM, .IllegalCSRUAccessM); @@ -267,8 +263,8 @@ module csr #(parameter assign IllegalCSRUAccessM = 1; end - if (`ZICOUNTERS_SUPPORTED) begin:counters - csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, + if (P.ZICOUNTERS_SUPPORTED) begin:counters + csrc #(P) counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, @@ -283,11 +279,11 @@ module csr #(parameter // merge CSR Reads assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM; - flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); + flopenrc #(P.XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient - assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 & PrivilegeModeW != `M_MODE) | - (CSRAdrM[9:8] == 2'b01 & PrivilegeModeW == `U_MODE); + assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 & PrivilegeModeW != P.M_MODE) | + (CSRAdrM[9:8] == 2'b01 & PrivilegeModeW == P.U_MODE); assign IllegalCSRAccessM = ((IllegalCSRCAccessM & IllegalCSRMAccessM & IllegalCSRSAccessM & IllegalCSRUAccessM | InsufficientCSRPrivilegeM) & CSRReadM) | IllegalCSRMWriteReadonlyM; diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 8c8842c6c..7dddd9d35 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -30,8 +30,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module csrc #(parameter +module csrc import cvw::*; #(parameter cvw_t P, MHPMCOUNTERBASE = 12'hB00, MTIME = 12'hB01, // this is a memory-mapped register; no such CSR exists, and access should fault MHPMCOUNTERHBASE = 12'hB80, @@ -67,22 +66,22 @@ module csrc #(parameter input logic FDivBusyE, // floating point divide busy input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, input logic [63:0] MTIME_CLINT, - output logic [`XLEN-1:0] CSRCReadValM, + output logic [P.XLEN-1:0] CSRCReadValM, output logic IllegalCSRCAccessM ); logic [4:0] CounterNumM; - logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; - logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; + logic [P.XLEN-1:0] HPMCOUNTER_REGW[P.COUNTERS-1:0]; + logic [P.XLEN-1:0] HPMCOUNTERH_REGW[P.COUNTERS-1:0]; logic LoadStallE, LoadStallM; logic StoreStallE, StoreStallM; - logic [`COUNTERS-1:0] WriteHPMCOUNTERM; - logic [`COUNTERS-1:0] CounterEvent; - logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; - logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:0]; + logic [P.COUNTERS-1:0] WriteHPMCOUNTERM; + logic [P.COUNTERS-1:0] CounterEvent; + logic [63:0] HPMCOUNTERPlusM[P.COUNTERS-1:0]; + logic [P.XLEN-1:0] NextHPMCOUNTERM[P.COUNTERS-1:0]; genvar i; // Interface signals @@ -93,8 +92,8 @@ module csrc #(parameter assign CounterEvent[0] = 1'b1; // MCYCLE always increments assign CounterEvent[1] = 1'b0; // Counter 1 doesn't exist assign CounterEvent[2] = InstrValidNotFlushedM; // MINSTRET instructions retired - if(`QEMU) begin: cevent // No other performance counters in QEMU - assign CounterEvent[`COUNTERS-1:3] = 0; + if(P.QEMU) begin: cevent // No other performance counters in QEMU + assign CounterEvent[P.COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters assign CounterEvent[3] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction assign CounterEvent[4] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions @@ -121,26 +120,26 @@ module csrc #(parameter // DivBusyE will never be assert high since this configuration uses the FPU to do integer division assign CounterEvent[24] = DivBusyE | FDivBusyE; // division cycles *** RT: might need to be delay until the next cycle // coverage on - assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions + assign CounterEvent[P.COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end // Counter update and write logic - for (i = 0; i < `COUNTERS; i = i+1) begin:cntr + for (i = 0; i < P.COUNTERS; i = i+1) begin:cntr assign WriteHPMCOUNTERM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERBASE + i); - assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; + assign NextHPMCOUNTERM[i][P.XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][P.XLEN-1:0]; always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; - else HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; + if (reset) HPMCOUNTER_REGW[i][P.XLEN-1:0] <= #1 0; + else HPMCOUNTER_REGW[i][P.XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; - if (`XLEN==32) begin // write high and low separately - logic [`COUNTERS-1:0] WriteHPMCOUNTERHM; - logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:0]; + if (P.XLEN==32) begin // write high and low separately + logic [P.COUNTERS-1:0] WriteHPMCOUNTERHM; + logic [P.XLEN-1:0] NextHPMCOUNTERHM[P.COUNTERS-1:0]; assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; assign WriteHPMCOUNTERHM[i] = CSRMWriteM & (CSRAdrM == MHPMCOUNTERHBASE + i); assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; always_ff @(posedge clk) //, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; - else HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; + if (reset) HPMCOUNTERH_REGW[i][P.XLEN-1:0] <= #1 0; + else HPMCOUNTERH_REGW[i][P.XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; end else begin // XLEN=64; write entire register assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; end @@ -149,17 +148,17 @@ module csrc #(parameter // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags assign CounterNumM = CSRAdrM[4:0]; // which counter to read? always_comb - if (PrivilegeModeW == `M_MODE | - MCOUNTEREN_REGW[CounterNumM] & (!`S_SUPPORTED | PrivilegeModeW == `S_MODE | SCOUNTEREN_REGW[CounterNumM])) begin + if (PrivilegeModeW == P.M_MODE | + MCOUNTEREN_REGW[CounterNumM] & (!P.S_SUPPORTED | PrivilegeModeW == P.S_MODE | SCOUNTEREN_REGW[CounterNumM])) begin IllegalCSRCAccessM = 0; - if (`XLEN==64) begin // 64-bit counter reads + if (P.XLEN==64) begin // 64-bit counter reads // Veri lator doesn't realize this only occurs for XLEN=64 /* verilator lint_off WIDTH */ if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT; // TIME register is a shadow of the memory-mapped MTIME from the CLINT /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS & CSRAdrM != MTIME) + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+P.COUNTERS & CSRAdrM != MTIME) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+P.COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; else begin CSRCReadValM = 0; @@ -171,13 +170,13 @@ module csrc #(parameter if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT[31:0];// TIME register is a shadow of the memory-mapped MTIME from the CLINT else if (CSRAdrM == TIMEH) CSRCReadValM = MTIME_CLINT[63:32]; /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS & CSRAdrM != MTIME) + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+P.COUNTERS & CSRAdrM != MTIME) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+P.COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+`COUNTERS & CSRAdrM != MTIMEH) + else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+P.COUNTERS & CSRAdrM != MTIMEH) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+`COUNTERS) + else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+P.COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; else begin CSRCReadValM = 0; diff --git a/src/privileged/csri.sv b/src/privileged/csri.sv index 0a62b2174..d50c65eee 100644 --- a/src/privileged/csri.sv +++ b/src/privileged/csri.sv @@ -27,16 +27,14 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csri #(parameter +module csri import cvw::*; #(parameter cvw_t P, MIE = 12'h304, MIP = 12'h344, SIE = 12'h104, SIP = 12'h144) ( input logic clk, reset, input logic CSRMWriteM, CSRSWriteM, - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic [11:0] CSRAdrM, input logic MExtInt, SExtInt, MTimerInt, STimerInt, MSwInt, input logic [11:0] MIDELEG_REGW, @@ -58,8 +56,8 @@ module csri #(parameter // MEIP, MTIP, MSIP are read-only // SEIP, STIP, SSIP is writable in MIP if S mode exists // SSIP is writable in SIP if S mode exists - if (`S_SUPPORTED) begin:mask - if (`SSTC_SUPPORTED) begin + if (P.S_SUPPORTED) begin:mask + if (P.SSTC_SUPPORTED) begin assign MIP_WRITE_MASK = 12'h202; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec) assign STIP = STimerInt; end else begin diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index fb519be37..ab9d41683 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -31,9 +31,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csrm #(parameter +module csrm import cvw::*; #(parameter cvw_t P, // Machine CSRs MVENDORID = 12'hF11, MARCHID = 12'hF12, @@ -68,52 +66,52 @@ module csrm #(parameter DSCRATCH0 = 12'h7B2, DSCRATCH1 = 12'h7B3, // Constants - ZERO = {(`XLEN){1'b0}}, + ZERO = {(P.XLEN){1'b0}}, MEDELEG_MASK = 16'hB3FF, MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable ) ( input logic clk, reset, input logic UngatedCSRMWriteM, CSRMWriteM, MTrapM, input logic [11:0] CSRAdrM, - input logic [`XLEN-1:0] NextEPCM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW, + input logic [P.XLEN-1:0] NextEPCM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW, input logic [4:0] NextCauseM, - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic [11:0] MIP_REGW, MIE_REGW, - output logic [`XLEN-1:0] CSRMReadValM, MTVEC_REGW, - output logic [`XLEN-1:0] MEPC_REGW, + output logic [P.XLEN-1:0] CSRMReadValM, MTVEC_REGW, + output logic [P.XLEN-1:0] MEPC_REGW, output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, output logic [15:0] MEDELEG_REGW, output logic [11:0] MIDELEG_REGW, - output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + output var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], + output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], output logic WriteMSTATUSM, WriteMSTATUSHM, output logic IllegalCSRMAccessM, IllegalCSRMWriteReadonlyM ); - logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW; - logic [`XLEN-1:0] MSCRATCH_REGW, MTVAL_REGW, MCAUSE_REGW; + logic [P.XLEN-1:0] MISA_REGW, MHARTID_REGW; + logic [P.XLEN-1:0] MSCRATCH_REGW, MTVAL_REGW, MCAUSE_REGW; logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop genvar i; - if (`PMP_ENTRIES > 0) begin:pmp - logic [`PMP_ENTRIES-1:0] WritePMPCFGM; - logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; - logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; - for(i=0; i<`PMP_ENTRIES; i++) begin + if (P.PMP_ENTRIES > 0) begin:pmp + logic [P.PMP_ENTRIES-1:0] WritePMPCFGM; + logic [P.PMP_ENTRIES-1:0] WritePMPADDRM ; + logic [P.PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; + for(i=0; i= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4 & (`XLEN==32 | CSRAdrM[0] == 0)) begin + IllegalCSRMAccessM = !(P.S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode + if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + P.PMP_ENTRIES) // reading a PMP entry + CSRMReadValM = {{(P.XLEN-(P.PA_BITS-2)){1'b0}}, PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]}; + else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + P.PMP_ENTRIES/4 & (P.XLEN==32 | CSRAdrM[0] == 0)) begin // only odd-numbered PMPCFG entries exist in RV64 - if (`XLEN==64) begin + if (P.XLEN==64) begin entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64 CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4], PMPCFG_ARRAY_REGW[entry+3],PMPCFG_ARRAY_REGW[entry+2],PMPCFG_ARRAY_REGW[entry+1],PMPCFG_ARRAY_REGW[entry]}; @@ -186,23 +184,23 @@ module csrm #(parameter MISA_ADR: CSRMReadValM = MISA_REGW; MVENDORID: CSRMReadValM = 0; MARCHID: CSRMReadValM = 0; - MIMPID: CSRMReadValM = `XLEN'h100; // pipelined implementation + MIMPID: CSRMReadValM = {{P.XLEN-12{1'b0}}, 12'h100}; // pipelined implementation MHARTID: CSRMReadValM = MHARTID_REGW; // hardwired to 0 MCONFIGPTR: CSRMReadValM = 0; // hardwired to 0 MSTATUS: CSRMReadValM = MSTATUS_REGW; MSTATUSH: CSRMReadValM = MSTATUSH_REGW; MTVEC: CSRMReadValM = MTVEC_REGW; - MEDELEG: CSRMReadValM = {{(`XLEN-16){1'b0}}, MEDELEG_REGW}; - MIDELEG: CSRMReadValM = {{(`XLEN-12){1'b0}}, MIDELEG_REGW}; - MIP: CSRMReadValM = {{(`XLEN-12){1'b0}}, MIP_REGW}; - MIE: CSRMReadValM = {{(`XLEN-12){1'b0}}, MIE_REGW}; + MEDELEG: CSRMReadValM = {{(P.XLEN-16){1'b0}}, MEDELEG_REGW}; + MIDELEG: CSRMReadValM = {{(P.XLEN-12){1'b0}}, MIDELEG_REGW}; + MIP: CSRMReadValM = {{(P.XLEN-12){1'b0}}, MIP_REGW}; + MIE: CSRMReadValM = {{(P.XLEN-12){1'b0}}, MIE_REGW}; MSCRATCH: CSRMReadValM = MSCRATCH_REGW; MEPC: CSRMReadValM = MEPC_REGW; MCAUSE: CSRMReadValM = MCAUSE_REGW; MTVAL: CSRMReadValM = MTVAL_REGW; MTINST: CSRMReadValM = 0; // implemented as trivial zero - MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; - MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; + MCOUNTEREN:CSRMReadValM = {{(P.XLEN-32){1'b0}}, MCOUNTEREN_REGW}; + MCOUNTINHIBIT:CSRMReadValM = {{(P.XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; default: begin CSRMReadValM = 0; diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index e085232a6..945a13c3b 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -28,9 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csrs #(parameter +module csrs import cvw::*; #(parameter cvw_t P, // Supervisor CSRs SSTATUS = 12'h100, SIE = 12'h104, @@ -47,16 +45,16 @@ module csrs #(parameter input logic clk, reset, input logic CSRSWriteM, STrapM, input logic [11:0] CSRAdrM, - input logic [`XLEN-1:0] NextEPCM, NextMtvalM, SSTATUS_REGW, + input logic [P.XLEN-1:0] NextEPCM, NextMtvalM, SSTATUS_REGW, input logic [4:0] NextCauseM, input logic STATUS_TVM, input logic MCOUNTEREN_TM, // TM bit (1) of MCOUNTEREN; cause illegal instruction when trying to access STIMECMP if clear - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic [1:0] PrivilegeModeW, - output logic [`XLEN-1:0] CSRSReadValM, STVEC_REGW, - output logic [`XLEN-1:0] SEPC_REGW, + output logic [P.XLEN-1:0] CSRSReadValM, STVEC_REGW, + output logic [P.XLEN-1:0] SEPC_REGW, output logic [31:0] SCOUNTEREN_REGW, - output logic [`XLEN-1:0] SATP_REGW, + output logic [P.XLEN-1:0] SATP_REGW, input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, input logic [63:0] MTIME_CLINT, output logic WriteSSTATUSM, @@ -65,14 +63,14 @@ module csrs #(parameter ); // Constants - localparam ZERO = {(`XLEN){1'b0}}; - localparam SEDELEG_MASK = ~(ZERO | `XLEN'b111 << 9); + localparam ZERO = {(P.XLEN){1'b0}}; + localparam SEDELEG_MASK = ~(ZERO | {{P.XLEN-3{1'b0}}, 3'b111} << 9); logic WriteSTVECM; logic WriteSSCRATCHM, WriteSEPCM; logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; logic WriteSTIMECMPM, WriteSTIMECMPHM; - logic [`XLEN-1:0] SSCRATCH_REGW, STVAL_REGW, SCAUSE_REGW; + logic [P.XLEN-1:0] SSCRATCH_REGW, STVAL_REGW, SCAUSE_REGW; logic [63:0] STIMECMP_REGW; // write enables @@ -82,34 +80,34 @@ module csrs #(parameter assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC)); assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE)); assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)); - assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM); + assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM); assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN); - assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM); - assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32); + assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == P.M_MODE | MCOUNTEREN_TM); + assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == P.M_MODE | MCOUNTEREN_TM) & (P.XLEN == 32); // CSRs - flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); - flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); - flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); - flopenr #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, {NextCauseM[4], {(`XLEN-5){1'b0}}, NextCauseM[3:0]}, SCAUSE_REGW); - flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); - if (`VIRTMEM_SUPPORTED) - flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); + flopenr #(P.XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[P.XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); + flopenr #(P.XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); + flopenr #(P.XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); + flopenr #(P.XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, {NextCauseM[4], {(P.XLEN-5){1'b0}}, NextCauseM[3:0]}, SCAUSE_REGW); + flopenr #(P.XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); + if (P.VIRTMEM_SUPPORTED) + flopenr #(P.XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported flopenr #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - if (`SSTC_SUPPORTED) begin : sstc - if (`XLEN == 64) begin : sstc64 - flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 64'hFFFFFFFFFFFFFFFF, STIMECMP_REGW); + if (P.SSTC_SUPPORTED) begin : sstc + if (P.XLEN == 64) begin : sstc64 + flopenl #(P.XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 64'hFFFFFFFFFFFFFFFF, STIMECMP_REGW); end else begin : sstc32 - flopenl #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[31:0]); - flopenl #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[63:32]); + flopenl #(P.XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[31:0]); + flopenl #(P.XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, 32'hFFFFFFFF, STIMECMP_REGW[63:32]); end end else assign STIMECMP_REGW = 0; // Supervisor timer interrupt logic // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs - if (`SSTC_SUPPORTED) + if (P.SSTC_SUPPORTED) assign STimerInt = ({1'b0, MTIME_CLINT} >= {1'b0, STIMECMP_REGW}); // unsigned comparison else assign STimerInt = 0; @@ -120,24 +118,24 @@ module csrs #(parameter case (CSRAdrM) SSTATUS: CSRSReadValM = SSTATUS_REGW; STVEC: CSRSReadValM = STVEC_REGW; - SIP: CSRSReadValM = {{(`XLEN-12){1'b0}}, MIP_REGW & 12'h222 & MIDELEG_REGW}; // only read supervisor fields - SIE: CSRSReadValM = {{(`XLEN-12){1'b0}}, MIE_REGW & 12'h222 & MIDELEG_REGW}; // only read supervisor fields + SIP: CSRSReadValM = {{(P.XLEN-12){1'b0}}, MIP_REGW & 12'h222 & MIDELEG_REGW}; // only read supervisor fields + SIE: CSRSReadValM = {{(P.XLEN-12){1'b0}}, MIE_REGW & 12'h222 & MIDELEG_REGW}; // only read supervisor fields SSCRATCH: CSRSReadValM = SSCRATCH_REGW; SEPC: CSRSReadValM = SEPC_REGW; SCAUSE: CSRSReadValM = SCAUSE_REGW; STVAL: CSRSReadValM = STVAL_REGW; - SATP: if (`VIRTMEM_SUPPORTED & (PrivilegeModeW == `M_MODE | ~STATUS_TVM)) CSRSReadValM = SATP_REGW; + SATP: if (P.VIRTMEM_SUPPORTED & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM)) CSRSReadValM = SATP_REGW; else begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; end - SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; - STIMECMP: if (`SSTC_SUPPORTED & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; + SCOUNTEREN:CSRSReadValM = {{(P.XLEN-32){1'b0}}, SCOUNTEREN_REGW}; + STIMECMP: if (P.SSTC_SUPPORTED & (PrivilegeModeW == P.M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[P.XLEN-1:0]; else begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; end - STIMECMPH: if (`SSTC_SUPPORTED & (`XLEN == 32) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; + STIMECMPH: if (P.SSTC_SUPPORTED & (P.XLEN == 32) & (PrivilegeModeW == P.M_MODE | MCOUNTEREN_TM)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; else begin // not supported for RV64 CSRSReadValM = 0; IllegalCSRSAccessM = 1; diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 831366bb3..fc98dcb56 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -27,18 +27,16 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csrsr ( +module csrsr import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, StallW, input logic WriteMSTATUSM, WriteMSTATUSHM, WriteSSTATUSM, input logic TrapM, FRegWriteM, input logic [1:0] NextPrivilegeModeM, PrivilegeModeW, input logic mretM, sretM, input logic WriteFRMM, WriteFFLAGSM, - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic SelHPTW, - output logic [`XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW, + output logic [P.XLEN-1:0] MSTATUS_REGW, SSTATUS_REGW, MSTATUSH_REGW, output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TW, output logic STATUS_MIE, STATUS_SIE, @@ -57,13 +55,13 @@ module csrsr ( // See Privileged Spec Section 3.1.6 // Lower privilege status registers are a subset of the full status register // *** consider adding MBE, SBE, UBE fields, parameterized to be fixed or adjustable - if (`XLEN==64) begin: csrsr64 // RV64 + if (P.XLEN==64) begin: csrsr64 // RV64 assign MSTATUS_REGW = {STATUS_SD, 25'b0, STATUS_MBE, STATUS_SBE, STATUS_SXL, STATUS_UXL, 9'b0, STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, STATUS_XS, STATUS_FS, STATUS_MPP, 2'b0, STATUS_SPP, STATUS_MPIE, STATUS_UBE, STATUS_SPIE, 1'b0, STATUS_MIE, 1'b0, STATUS_SIE, 1'b0}; - assign SSTATUS_REGW = {STATUS_SD, /*27'b0, */ 29'b0, /*STATUS_SXL, */ {`QEMU ? 2'b0 : STATUS_UXL}, /*9'b0, */ 12'b0, + assign SSTATUS_REGW = {STATUS_SD, /*27'b0, */ 29'b0, /*STATUS_SXL, */ {P.QEMU ? 2'b0 : STATUS_UXL}, /*9'b0, */ 12'b0, /*STATUS_TSR, STATUS_TW, STATUS_TVM, */STATUS_MXR, STATUS_SUM, /* STATUS_MPRV, */ 1'b0, STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, STATUS_SPP, /*STATUS_MPIE*/ 1'b0, STATUS_UBE, STATUS_SPIE, @@ -83,54 +81,54 @@ module csrsr ( end // extract values to write to upper status register on 64/32-bit access - if (`XLEN==64) begin:upperstatus - assign nextMBE = CSRWriteValM[37] & `BIGENDIAN_SUPPORTED; - assign nextSBE = CSRWriteValM[36] & `S_SUPPORTED & `BIGENDIAN_SUPPORTED; + if (P.XLEN==64) begin:upperstatus + assign nextMBE = CSRWriteValM[37] & P.BIGENDIAN_SUPPORTED; + assign nextSBE = CSRWriteValM[36] & P.S_SUPPORTED & P.BIGENDIAN_SUPPORTED; end else begin:upperstatus assign nextMBE = STATUS_MBE; assign nextSBE = STATUS_SBE; end // harwired STATUS bits - assign STATUS_TSR = `S_SUPPORTED & STATUS_TSR_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_TW = (`S_SUPPORTED | `U_SUPPORTED) & STATUS_TW_INT; // override register with 0 if only machine mode supported - assign STATUS_TVM = `S_SUPPORTED & STATUS_TVM_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_MXR = `S_SUPPORTED & STATUS_MXR_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_TSR = P.S_SUPPORTED & STATUS_TSR_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_TW = (P.S_SUPPORTED | P.U_SUPPORTED) & STATUS_TW_INT; // override register with 0 if only machine mode supported + assign STATUS_TVM = P.S_SUPPORTED & STATUS_TVM_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_MXR = P.S_SUPPORTED & STATUS_MXR_INT; // override reigster with 0 if supervisor mode not supported /* assign STATUS_UBE = 0; // little-endian assign STATUS_SBE = 0; // little-endian assign STATUS_MBE = 0; // little-endian */ // SXL and UXL bits only matter for RV64. Set to 10 for RV64 if mode is supported, or 0 if not - assign STATUS_SXL = `S_SUPPORTED ? 2'b10 : 2'b00; // 10 if supervisor mode supported - assign STATUS_UXL = `U_SUPPORTED ? 2'b10 : 2'b00; // 10 if user mode supported - assign STATUS_SUM = `S_SUPPORTED & `VIRTMEM_SUPPORTED & STATUS_SUM_INT; // override reigster with 0 if supervisor mode not supported - assign STATUS_MPRV = `U_SUPPORTED & STATUS_MPRV_INT; // override with 0 if user mode not supported - assign STATUS_FS = (`S_SUPPORTED & (`F_SUPPORTED | `D_SUPPORTED)) ? STATUS_FS_INT : 2'b00; // off if no FP + assign STATUS_SXL = P.S_SUPPORTED ? 2'b10 : 2'b00; // 10 if supervisor mode supported + assign STATUS_UXL = P.U_SUPPORTED ? 2'b10 : 2'b00; // 10 if user mode supported + assign STATUS_SUM = P.S_SUPPORTED & P.VIRTMEM_SUPPORTED & STATUS_SUM_INT; // override reigster with 0 if supervisor mode not supported + assign STATUS_MPRV = P.U_SUPPORTED & STATUS_MPRV_INT; // override with 0 if user mode not supported + assign STATUS_FS = (P.S_SUPPORTED & (P.F_SUPPORTED | P.D_SUPPORTED)) ? STATUS_FS_INT : 2'b00; // off if no FP assign STATUS_SD = (STATUS_FS == 2'b11) | (STATUS_XS == 2'b11); // dirty state logic assign STATUS_XS = 2'b00; // No additional user-mode state to be dirty always_comb - if (CSRWriteValM[12:11] == `U_MODE & `U_SUPPORTED) STATUS_MPP_NEXT = `U_MODE; - else if (CSRWriteValM[12:11] == `S_MODE & `S_SUPPORTED) STATUS_MPP_NEXT = `S_MODE; - else STATUS_MPP_NEXT = `M_MODE; + if (CSRWriteValM[12:11] == P.U_MODE & P.U_SUPPORTED) STATUS_MPP_NEXT = P.U_MODE; + else if (CSRWriteValM[12:11] == P.S_MODE & P.S_SUPPORTED) STATUS_MPP_NEXT = P.S_MODE; + else STATUS_MPP_NEXT = P.M_MODE; /////////////////////////////////////////// // Endianness logic Privileged Spec 3.1.6.4 /////////////////////////////////////////// - if (`BIGENDIAN_SUPPORTED) begin: endianmux + if (P.BIGENDIAN_SUPPORTED) begin: endianmux // determine whether big endian accesses should be made logic [1:0] EndiannessPrivMode; always_comb begin - if (SelHPTW) EndiannessPrivMode = `S_MODE; + if (SelHPTW) EndiannessPrivMode = P.S_MODE; //coverage off -item c 1 -feccondrow 1 // status.MPRV always gets reset upon leaving machine mode, so MPRV will never be high when out of machine mode - else if (PrivilegeModeW == `M_MODE & STATUS_MPRV) EndiannessPrivMode = STATUS_MPP; + else if (PrivilegeModeW == P.M_MODE & STATUS_MPRV) EndiannessPrivMode = STATUS_MPP; //coverage on else EndiannessPrivMode = PrivilegeModeW; case (EndiannessPrivMode) - `M_MODE: BigEndianM = STATUS_MBE; - `S_MODE: BigEndianM = STATUS_SBE; + P.M_MODE: BigEndianM = STATUS_MBE; + P.S_MODE: BigEndianM = STATUS_SBE; default: BigEndianM = STATUS_UBE; endcase end @@ -148,7 +146,7 @@ module csrsr ( STATUS_MXR_INT <= #1 0; STATUS_SUM_INT <= #1 0; STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 - STATUS_FS_INT <= #1 `F_SUPPORTED ? 2'b00 : 2'b00; // leave floating-point off until activated, even if F_SUPPORTED + STATUS_FS_INT <= #1 P.F_SUPPORTED ? 2'b00 : 2'b00; // leave floating-point off until activated, even if F_SUPPORTED STATUS_MPP <= #1 0; STATUS_SPP <= #1 0; STATUS_MPIE <= #1 0; @@ -164,7 +162,7 @@ module csrsr ( // y = PrivilegeModeW // x = NextPrivilegeModeM // Modes: 11 = Machine, 01 = Supervisor, 00 = User - if (NextPrivilegeModeM == `M_MODE) begin + if (NextPrivilegeModeM == P.M_MODE) begin STATUS_MPIE <= #1 STATUS_MIE; STATUS_MIE <= #1 0; STATUS_MPP <= #1 PrivilegeModeW; @@ -176,11 +174,11 @@ module csrsr ( end else if (mretM) begin // Privileged 3.1.6.1 STATUS_MIE <= #1 STATUS_MPIE; // restore global interrupt enable STATUS_MPIE <= #1 1; // - STATUS_MPP <= #1 `U_SUPPORTED ? `U_MODE : `M_MODE; // set MPP to lowest supported privilege level - STATUS_MPRV_INT <= #1 STATUS_MPRV_INT & (STATUS_MPP == `M_MODE); // page 21 of privileged spec. + STATUS_MPP <= #1 P.U_SUPPORTED ? P.U_MODE : P.M_MODE; // set MPP to lowest supported privilege level + STATUS_MPRV_INT <= #1 STATUS_MPRV_INT & (STATUS_MPP == P.M_MODE); // page 21 of privileged spec. end else if (sretM) begin STATUS_SIE <= #1 STATUS_SPIE; // restore global interrupt enable - STATUS_SPIE <= #1 `S_SUPPORTED; + STATUS_SPIE <= #1 P.S_SUPPORTED; STATUS_SPP <= #1 0; // set SPP to lowest supported privilege level to catch bugs STATUS_MPRV_INT <= #1 0; // always clear MPRV end else if (WriteMSTATUSM) begin @@ -192,28 +190,28 @@ module csrsr ( STATUS_MPRV_INT <= #1 CSRWriteValM[17]; STATUS_FS_INT <= #1 CSRWriteValM[14:13]; STATUS_MPP <= #1 STATUS_MPP_NEXT; - STATUS_SPP <= #1 `S_SUPPORTED & CSRWriteValM[8]; + STATUS_SPP <= #1 P.S_SUPPORTED & CSRWriteValM[8]; STATUS_MPIE <= #1 CSRWriteValM[7]; - STATUS_SPIE <= #1 `S_SUPPORTED & CSRWriteValM[5]; + STATUS_SPIE <= #1 P.S_SUPPORTED & CSRWriteValM[5]; STATUS_MIE <= #1 CSRWriteValM[3]; - STATUS_SIE <= #1 `S_SUPPORTED & CSRWriteValM[1]; - STATUS_UBE <= #1 CSRWriteValM[6] & `U_SUPPORTED & `BIGENDIAN_SUPPORTED; + STATUS_SIE <= #1 P.S_SUPPORTED & CSRWriteValM[1]; + STATUS_UBE <= #1 CSRWriteValM[6] & P.U_SUPPORTED & P.BIGENDIAN_SUPPORTED; STATUS_MBE <= #1 nextMBE; STATUS_SBE <= #1 nextSBE; // coverage off // MSTATUSH only exists in 32-bit configurations, will not be hit on rv64gc end else if (WriteMSTATUSHM) begin - STATUS_MBE <= #1 CSRWriteValM[5] & `BIGENDIAN_SUPPORTED; - STATUS_SBE <= #1 CSRWriteValM[4] & `S_SUPPORTED & `BIGENDIAN_SUPPORTED; + STATUS_MBE <= #1 CSRWriteValM[5] & P.BIGENDIAN_SUPPORTED; + STATUS_SBE <= #1 CSRWriteValM[4] & P.S_SUPPORTED & P.BIGENDIAN_SUPPORTED; // coverage on end else if (WriteSSTATUSM) begin // write a subset of the STATUS bits STATUS_MXR_INT <= #1 CSRWriteValM[19]; STATUS_SUM_INT <= #1 CSRWriteValM[18]; STATUS_FS_INT <= #1 CSRWriteValM[14:13]; - STATUS_SPP <= #1 `S_SUPPORTED & CSRWriteValM[8]; - STATUS_SPIE <= #1 `S_SUPPORTED & CSRWriteValM[5]; - STATUS_SIE <= #1 `S_SUPPORTED & CSRWriteValM[1]; - STATUS_UBE <= #1 CSRWriteValM[6] & `U_SUPPORTED & `BIGENDIAN_SUPPORTED; + STATUS_SPP <= #1 P.S_SUPPORTED & CSRWriteValM[8]; + STATUS_SPIE <= #1 P.S_SUPPORTED & CSRWriteValM[5]; + STATUS_SIE <= #1 P.S_SUPPORTED & CSRWriteValM[1]; + STATUS_UBE <= #1 CSRWriteValM[6] & P.U_SUPPORTED & P.BIGENDIAN_SUPPORTED; end else if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #1 2'b11; end endmodule diff --git a/src/privileged/csru.sv b/src/privileged/csru.sv index e474e5967..941b13f45 100644 --- a/src/privileged/csru.sv +++ b/src/privileged/csru.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module csru #(parameter +module csru import cvw::*; #(parameter cvw_t P, FFLAGS = 12'h001, FRM = 12'h002, FCSR = 12'h003) ( @@ -36,9 +34,9 @@ module csru #(parameter input logic InstrValidNotFlushedM, input logic CSRUWriteM, input logic [11:0] CSRAdrM, - input logic [`XLEN-1:0] CSRWriteValM, + input logic [P.XLEN-1:0] CSRWriteValM, input logic [1:0] STATUS_FS, - output logic [`XLEN-1:0] CSRUReadValM, + output logic [P.XLEN-1:0] CSRUReadValM, input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, output logic WriteFRMM, WriteFFLAGSM, @@ -71,9 +69,9 @@ module csru #(parameter end else begin IllegalCSRUAccessM = 0; case (CSRAdrM) - FFLAGS: CSRUReadValM = {{(`XLEN-5){1'b0}}, FFLAGS_REGW}; - FRM: CSRUReadValM = {{(`XLEN-3){1'b0}}, FRM_REGW}; - FCSR: CSRUReadValM = {{(`XLEN-8){1'b0}}, FRM_REGW, FFLAGS_REGW}; + FFLAGS: CSRUReadValM = {{(P.XLEN-5){1'b0}}, FFLAGS_REGW}; + FRM: CSRUReadValM = {{(P.XLEN-3){1'b0}}, FRM_REGW}; + FCSR: CSRUReadValM = {{(P.XLEN-8){1'b0}}, FRM_REGW, FFLAGS_REGW}; default: begin CSRUReadValM = 0; IllegalCSRUAccessM = 1; diff --git a/src/privileged/privdec.sv b/src/privileged/privdec.sv index 9aaa9979e..59db8c9d4 100644 --- a/src/privileged/privdec.sv +++ b/src/privileged/privdec.sv @@ -27,9 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module privdec ( +module privdec import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallM, input logic [31:20] InstrM, // privileged instruction function field @@ -52,26 +50,26 @@ module privdec ( // Decode privileged instructions /////////////////////////////////////////// - assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & `S_SUPPORTED & - (PrivilegeModeW == `M_MODE | PrivilegeModeW == `S_MODE & ~STATUS_TSR); - assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) & (PrivilegeModeW == `M_MODE); + assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & P.S_SUPPORTED & + (PrivilegeModeW == P.M_MODE | PrivilegeModeW == P.S_MODE & ~STATUS_TSR); + assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) & (PrivilegeModeW == P.M_MODE); assign ecallM = PrivilegedM & (InstrM[31:20] == 12'b000000000000); assign ebreakM = PrivilegedM & (InstrM[31:20] == 12'b000000000001); assign wfiM = PrivilegedM & (InstrM[31:20] == 12'b000100000101); assign sfencevmaM = PrivilegedM & (InstrM[31:25] == 7'b0001001) & - (PrivilegeModeW == `M_MODE | (PrivilegeModeW == `S_MODE & ~STATUS_TVM)); + (PrivilegeModeW == P.M_MODE | (PrivilegeModeW == P.S_MODE & ~STATUS_TVM)); /////////////////////////////////////////// // WFI timeout Privileged Spec 3.1.6.5 /////////////////////////////////////////// - if (`U_SUPPORTED) begin:wfi - logic [`WFI_TIMEOUT_BIT:0] WFICount, WFICountPlus1; + if (P.U_SUPPORTED) begin:wfi + logic [P.WFI_TIMEOUT_BIT:0] WFICount, WFICountPlus1; assign WFICountPlus1 = WFICount + 1; - floprc #(`WFI_TIMEOUT_BIT+1) wficountreg(clk, reset, ~wfiM, WFICountPlus1, WFICount); // count while in WFI + floprc #(P.WFI_TIMEOUT_BIT+1) wficountreg(clk, reset, ~wfiM, WFICountPlus1, WFICount); // count while in WFI // coverage off -item e 1 -fecexprrow 1 // WFI Timout trap will not occur when STATUS_TW is low while in supervisor mode, so the system gets stuck waiting for an interrupt and triggers a watchdog timeout. - assign WFITimeoutM = ((STATUS_TW & PrivilegeModeW != `M_MODE) | (`S_SUPPORTED & PrivilegeModeW == `U_MODE)) & WFICount[`WFI_TIMEOUT_BIT]; + assign WFITimeoutM = ((STATUS_TW & PrivilegeModeW != P.M_MODE) | (P.S_SUPPORTED & PrivilegeModeW == P.U_MODE)) & WFICount[P.WFI_TIMEOUT_BIT]; // coverage on end else assign WFITimeoutM = 0; diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 7d34e22fc..659a00f24 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -27,19 +27,17 @@ // SOFTWARE. /////////////////////////////////////////// -`include "wally-config.vh" - -module privileged ( +module privileged import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, // CSR Reads and Writes, and values needed for traps input logic CSRReadM, CSRWriteM, // Read or write CSRs - input logic [`XLEN-1:0] SrcAM, // GPR register to write + input logic [P.XLEN-1:0] SrcAM, // GPR register to write input logic [31:0] InstrM, // Instruction input logic [31:0] InstrOrigM, // Original compressed or uncompressed instruction in Memory stage for Illegal Instruction MTVAL - input logic [`XLEN-1:0] IEUAdrM, // address from IEU - input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic + input logic [P.XLEN-1:0] IEUAdrM, // address from IEU + input logic [P.XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic // control signals input logic InstrValidM, // Current instruction is valid (not flushed) input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt @@ -76,16 +74,16 @@ module privileged ( input logic [4:0] SetFflagsM, // set FCSR flags from FPU input logic SelHPTW, // HPTW in use. Causes system to use S-mode endianness for accesses // CSR outputs - output logic [`XLEN-1:0] CSRReadValW, // Value read from CSR + output logic [P.XLEN-1:0] CSRReadValW, // Value read from CSR output logic [1:0] PrivilegeModeW, // current privilege mode - output logic [`XLEN-1:0] SATP_REGW, // supervisor address translation register + output logic [P.XLEN-1:0] SATP_REGW, // supervisor address translation register output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // status register bits output logic [1:0] STATUS_MPP, STATUS_FS, // status register bits - output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration entries to MMU - output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // PMP address entries to MMU + output var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration entries to MMU + output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], // PMP address entries to MMU output logic [2:0] FRM_REGW, // FPU rounding mode // PC logic output in privileged unit - output logic [`XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic + output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic // control outputs output logic RetM, TrapM, // return instruction, or trap output logic sfencevmaM, // sfence.vma instruction @@ -116,17 +114,17 @@ module privileged ( // track the current privilege level - privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, + privmode #(P) privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); // decode privileged instructions - privdec pmd(.clk, .reset, .StallM, .InstrM(InstrM[31:20]), + privdec #(P) pmd(.clk, .reset, .StallM, .InstrM(InstrM[31:20]), .PrivilegedM, .IllegalIEUFPUInstrM, .IllegalCSRAccessM, .PrivilegeModeW, .STATUS_TSR, .STATUS_TVM, .STATUS_TW, .IllegalInstrFaultM, .EcallFaultM, .BreakpointFaultM, .sretM, .mretM, .wfiM, .sfencevmaM); // Control and Status Registers - csr csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, + csr #(P) csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, .InstrM, .InstrOrigM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, @@ -148,7 +146,7 @@ module privileged ( .InstrPageFaultM, .InstrAccessFaultM, .HPTWInstrAccessFaultM, .IllegalIEUFPUInstrM); // trap logic - trap trap(.reset, + trap #(P) trap(.reset, .InstrMisalignedFaultM, .InstrAccessFaultM, .HPTWInstrAccessFaultM, .IllegalInstrFaultM, .BreakpointFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, .LoadAccessFaultM, .StoreAmoAccessFaultM, .EcallFaultM, .InstrPageFaultM, diff --git a/src/privileged/privmode.sv b/src/privileged/privmode.sv index 615f30439..aa111732d 100644 --- a/src/privileged/privmode.sv +++ b/src/privileged/privmode.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module privmode ( +module privmode import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallW, input logic TrapM, // Trap @@ -40,20 +38,20 @@ module privmode ( output logic [1:0] PrivilegeModeW // current privilege mode ); - if (`U_SUPPORTED) begin:privmode + if (P.U_SUPPORTED) begin:privmode // PrivilegeMode FSM always_comb begin if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8) - if (`S_SUPPORTED & DelegateM) NextPrivilegeModeM = `S_MODE; - else NextPrivilegeModeM = `M_MODE; + if (P.S_SUPPORTED & DelegateM) NextPrivilegeModeM = P.S_MODE; + else NextPrivilegeModeM = P.M_MODE; end else if (mretM) NextPrivilegeModeM = STATUS_MPP; else if (sretM) NextPrivilegeModeM = {1'b0, STATUS_SPP}; else NextPrivilegeModeM = PrivilegeModeW; end - flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, `M_MODE, PrivilegeModeW); + flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, P.M_MODE, PrivilegeModeW); end else begin // only machine mode supported - assign NextPrivilegeModeM = `M_MODE; - assign PrivilegeModeW = `M_MODE; + assign NextPrivilegeModeM = P.M_MODE; + assign PrivilegeModeW = P.M_MODE; end -endmodule \ No newline at end of file +endmodule diff --git a/src/privileged/privpiperegs.sv b/src/privileged/privpiperegs.sv index 684b0ad73..be1a238ad 100644 --- a/src/privileged/privpiperegs.sv +++ b/src/privileged/privpiperegs.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module privpiperegs ( input logic clk, reset, input logic StallD, StallE, StallM, diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index 96b404ef9..0d8002d18 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module trap ( +module trap import cvw::*; #(parameter cvw_t P) ( input logic reset, input logic InstrMisalignedFaultM, InstrAccessFaultM, HPTWInstrAccessFaultM, IllegalInstrFaultM, input logic BreakpointFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM, @@ -63,16 +61,16 @@ module trap ( // & with ~CommittedM to make sure MEPC isn't chosen so as to rerun the same instr twice /////////////////////////////////////////// - assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) | STATUS_MIE; // if M ints enabled or lower priv 3.1.9 - assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) | ((PrivilegeModeW == `S_MODE) & STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9 + assign MIntGlobalEnM = (PrivilegeModeW != P.M_MODE) | STATUS_MIE; // if M ints enabled or lower priv 3.1.9 + assign SIntGlobalEnM = (PrivilegeModeW == P.U_MODE) | ((PrivilegeModeW == P.S_MODE) & STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9 assign PendingIntsM = MIP_REGW & MIE_REGW; assign IntPendingM = |PendingIntsM; assign Committed = CommittedM | CommittedF; assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW); assign ValidIntsM = {12{~Committed}} & EnabledIntsM; assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request. - assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & - (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE); + assign DelegateM = P.S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & + (PrivilegeModeW == P.U_MODE | PrivilegeModeW == P.S_MODE); /////////////////////////////////////////// // Trigger Traps and RET diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 95b57c848..1ef96af0c 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -270,7 +270,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( // privileged unit if (P.ZICSR_SUPPORTED) begin:priv - privileged priv( + privileged #(P) priv( .clk, .reset, .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, From 8aba89738632e8a3d39fa5e232e58d4e1855f5a1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 12:13:11 -0500 Subject: [PATCH 14/20] Update top level parameterized. Simulation slowed down to 4.5 minutes. --- src/fpu/fctrl.sv | 41 +++---- src/fpu/fpu.sv | 210 ++++++++++++++++---------------- src/wally/wallypipelinedcore.sv | 2 +- 3 files changed, 125 insertions(+), 128 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 3252e0d9f..5eda917c4 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -25,9 +25,8 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module fctrl ( +module fctrl import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, // input control signals @@ -49,7 +48,7 @@ module fctrl ( // opperation mux selections output logic FCvtIntE, FCvtIntW, // convert to integer opperation output logic [2:0] FrmM, // FP rounding mode - output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format + output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic FpLoadStoreM, // FP load or store instruction output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit @@ -74,7 +73,7 @@ module fctrl ( logic [1:0] PostProcSelD; // select result in the post processing unit logic [1:0] FResSelD; // Select one of the results that finish in the memory stage logic [2:0] FrmD, FrmE; // FP rounding mode - logic [`FMTBITS-1:0] FmtD; // FP format + logic [P.FMTBITS-1:0] FmtD; // FP format logic [1:0] Fmt, Fmt2; // format - before possible reduction logic SupportedFmt; // is the format supported logic SupportedFmt2; // is the source format supported for fp -> fp @@ -84,10 +83,10 @@ module fctrl ( assign Fmt = Funct7D[1:0]; assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp - assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) | - (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED)); - assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) | - (Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED)); + assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) | + (Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED)); + assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) | + (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); // decode the instruction // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt @@ -102,15 +101,15 @@ module fctrl ( case(OpD) 7'b0000111: case(Funct3D) 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw - 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld - 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq - 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh endcase 7'b0100111: case(Funct3D) 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw - 3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd - 3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq - 3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh endcase 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub @@ -227,14 +226,14 @@ module fctrl ( // 10 - half // 11 - quad - if (`FPSIZES == 1) + if (P.FPSIZES == 1) assign FmtD = 0; - else if (`FPSIZES == 2)begin + else if (P.FPSIZES == 2)begin logic [1:0] FmtTmp; assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0]; - assign FmtD = (`FMT == FmtTmp); + assign FmtD = (P.FMT == FmtTmp); end - else if (`FPSIZES == 3|`FPSIZES == 4) + else if (P.FPSIZES == 3|P.FPSIZES == 4) assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; // Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN. @@ -313,7 +312,7 @@ module fctrl ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD}, {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); @@ -321,11 +320,11 @@ module fctrl ( flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); // Integer division on FPU divider - if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE; + if (P.M_SUPPORTED & P.IDIV_ON_FPU) assign IDivStartE = IntDivE; else assign IDivStartE = 0; // E/M pipleine register - flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}, {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM}); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 32bdfc1ca..91d2d5354 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fpu ( +module fpu import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, // Hazards @@ -44,7 +42,7 @@ module fpu ( // Execute stage input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations input logic IntDivE, W64E, // Integer division on FPU - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) input logic [4:0] RdE, // which FP register to write to (from IEU) output logic FWriteIntE, // integer register write enable (to IEU) output logic FCvtIntE, // Convert to int (to IEU) @@ -53,16 +51,16 @@ module fpu ( input logic [4:0] RdM, // which FP register to write to (from IEU) output logic FRegWriteM, // FP register write enable (to privileged unit) output logic FpLoadStoreM, // Fp load instruction? (to LSU) - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic [P.FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) + output logic [P.XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU) output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) // Writeback stage input logic [4:0] RdW, // which FP register to write to (from IEU) - input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) - output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + input logic [P.FLEN-1:0] ReadDataW, // Read data (from LSU) + output logic [P.XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) output logic FCvtIntW, // select FCvtIntRes (to IEU) - output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) + output logic [P.XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) ); // RISC-V FPU specifics: @@ -72,7 +70,7 @@ module fpu ( // control signals logic FRegWriteW; // FP register write enable logic [2:0] FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double logic FDivStartE, IDivStartE; // Start division or squareroot logic FWriteIntM; // Write to integer register logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals @@ -86,20 +84,20 @@ module fpu ( logic FRegWriteE; // Write floating-point register // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) + logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [P.FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [P.FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [P.XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [P.FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [P.FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) // unpacking signals logic XsE, YsE, ZsE; // input's sign - execute stage logic XsM, YsM; // input's sign - memory stage - logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage - logic [`NE-1:0] ZeM; // input's exponent - memory stage - logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage - logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage + logic [P.NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [P.NE-1:0] ZeM; // input's exponent - memory stage + logic [P.NF:0] XmE, YmE, ZmE; // input's significand - execute stage + logic [P.NF:0] XmM, YmM, ZmM; // input's significand - memory stage logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage @@ -110,56 +108,56 @@ module fpu ( logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value) - logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations + logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*`NF+3:0] SmE, SmM; // Sum significand + logic [3*P.NF+3:0] SmE, SmM; // Sum significand logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output - logic [`NE+1:0] SeE,SeM; // Sum exponent + logic [P.NE+1:0] SeE,SeM; // Sum exponent logic InvAE, InvAM; // Invert addend logic AsE, AsM; // Addend sign logic PsE, PsM; // Product sign logic SsE, SsM; // Sum sign - logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count + logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count // Cvt Signals - logic [`NE:0] CeE, CeM; // convert intermediate expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [P.NE:0] CeE, CeM; // convert intermediate expoent + logic [P.LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal logic CsE, CsM; // convert result sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) - logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) + logic [P.CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) // divide signals - logic [`DIVb:0] QmM; // fdivsqrt signifcand - logic [`NE+1:0] QeM; // fdivsqrt exponent + logic [P.DIVb:0] QmM; // fdivsqrt signifcand + logic [P.NE+1:0] QeM; // fdivsqrt exponent logic DivStickyM; // fdivsqrt sticky bit logic FDivDoneE, IFDivStartE; // fdivsqrt control signals - logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) + logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) // result and flag signals - logic [`XLEN-1:0] ClassResE; // classify result - logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) - logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) + logic [P.XLEN-1:0] ClassResE; // classify result + logic [P.FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) + logic [P.XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) - logic [`FLEN-1:0] PostProcResM; // Postprocessor output + logic [P.FLEN-1:0] SgnResE; // sign injection result + logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) + logic [P.FLEN-1:0] PostProcResM; // Postprocessor output logic [4:0] PostProcFlgM; // Postprocessor flags logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result - logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage - logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register + logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result + logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register // other signals - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv - logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed - logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed + logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed + logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt - logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer + logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move ////////////////////////////////////////////////////////////////////////////////////////// @@ -167,7 +165,7 @@ module fpu ( ////////////////////////////////////////////////////////////////////////////////////////// // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .Funct3E, .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, @@ -183,9 +181,9 @@ module fpu ( .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); // D/E pipeline registers - flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(P.FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(P.FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(P.FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); ////////////////////////////////////////////////////////////////////////////////////////// // Execute Stage: hazards, forwarding, unpacking, execution units @@ -197,34 +195,34 @@ module fpu ( .XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs - mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE); - mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE); - mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); + mux3 #(P.FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE); + mux3 #(P.FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE); + mux3 #(P.FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z - if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; - else if(`FPSIZES == 2) - mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, - {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, - {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, - {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes + if(P.FPSIZES == 1) assign BoxedOneE = {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}; + else if(P.FPSIZES == 2) + mux2 #(P.FLEN) fonemux ({{P.FLEN-P.LEN1{1'b1}}, 2'b0, {P.NE1-1{1'b1}}, (P.NF1)'(0)}, {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes + else if(P.FPSIZES == 3 | P.FPSIZES == 4) + mux4 #(P.FLEN) fonemux ({{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)}, + {{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)}, + {{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)}, + {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10); - mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract + mux2 #(P.FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z // For add and subtract, Z comes from second source operand - if(`FPSIZES == 1) assign BoxedZeroE = 0; - else if(`FPSIZES == 2) - mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}, - {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, - {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, - (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes + if(P.FPSIZES == 1) assign BoxedZeroE = 0; + else if(P.FPSIZES == 2) + mux2 #(P.FLEN) fmulzeromux ({{P.FLEN-P.LEN1{1'b1}}, {P.LEN1{1'b0}}}, (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes + else if(P.FPSIZES == 3 | P.FPSIZES == 4) + mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}, + {{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}, + {{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}, + (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}; - mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); + mux3 #(P.FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), @@ -266,62 +264,62 @@ module fpu ( // NaN Box SrcA to convert integer to requested FP size - if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}; - else if(`FPSIZES == 2) - mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]}, - {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, - {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, - {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes + if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; + else if(P.FPSIZES == 2) + mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + else if(P.FPSIZES == 3 | P.FPSIZES == 4) + mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, + {{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]}, + {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, + {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes // select a result that may be written to the FP register - mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register with fmv - to IEU - if(`FPSIZES == 1) begin - assign mvsgn = XE[`FLEN-1]; + if(P.FPSIZES == 1) begin + assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; - end else if(`FPSIZES == 2) begin - mux2 #(1) sgnmux (XE[`LEN1-1], XE[`FLEN-1],FmtE, mvsgn); - mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{mvsgn}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE); - end else if(`FPSIZES == 3 | `FPSIZES == 4) begin - mux4 #(1) sgnmux (XE[`H_LEN-1], XE[`S_LEN-1], XE[`D_LEN-1], XE[`LLEN-1], FmtE, mvsgn); - mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{mvsgn}}, XE[`H_LEN-1:0]}, - {{`FLEN-`S_LEN{mvsgn}}, XE[`S_LEN-1:0]}, - {{`FLEN-`D_LEN{mvsgn}}, XE[`D_LEN-1:0]}, + end else if(P.FPSIZES == 2) begin + mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn); + mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE); + end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin + mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn); + mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, + {{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, + {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, XE, FmtE, SgnExtXE); end - if (`FLEN>`XLEN) - assign IntSrcXE = SgnExtXE[`XLEN-1:0]; + if (P.FLEN>P.XLEN) + assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{mvsgn}}, SgnExtXE}; - mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); + assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; + mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // E/M pipe registers // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); - flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); - flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); - flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); - flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); + flopenrc #(P.NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); + flopenrc #(P.NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); + flopenrc #(P.FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); + flopenrc #(P.XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(P.FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM, {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); - flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, + flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE}, {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM}); - flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); + flopenrc #(P.FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); ////////////////////////////////////////////////////////////////////////////////////////// // Memory Stage: postprocessor and result muxes @@ -337,18 +335,18 @@ module fpu ( // FPU flag selection - to privileged mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); - mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM); + mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM); // M/W pipe registers - flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); - flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); - flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); + flopenrc #(P.FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); + flopenrc #(P.XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); + flopenrc #(P.XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); ////////////////////////////////////////////////////////////////////////////////////////// // Writeback Stage: result mux ////////////////////////////////////////////////////////////////////////////////////////// // select the result to be written to the FP register - mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW); + mux2 #(P.FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW); endmodule // fpu diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 1ef96af0c..a3becb9be 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -315,7 +315,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( // floating point unit if (P.F_SUPPORTED) begin:fpu - fpu fpu( + fpu #(P) fpu( .clk, .reset, .FRM_REGW, // Rounding mode from CSR .InstrD, // instruction from IFU From 923c00b92812ee3b9b51eddeee526a7a44e74fd4 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 13:56:51 -0500 Subject: [PATCH 15/20] I think I've solved the slow down issue. Parameters can't be mixed with cvw_t and other types. --- src/privileged/csrc.sv | 22 ++++++------ src/privileged/csri.sv | 11 +++--- src/privileged/csrm.sv | 78 +++++++++++++++++++++--------------------- src/privileged/csrs.sv | 28 +++++++-------- src/privileged/csru.sv | 9 ++--- 5 files changed, 75 insertions(+), 73 deletions(-) diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 7dddd9d35..e7e50a7d3 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -30,17 +30,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module csrc import cvw::*; #(parameter cvw_t P, - MHPMCOUNTERBASE = 12'hB00, - MTIME = 12'hB01, // this is a memory-mapped register; no such CSR exists, and access should fault - MHPMCOUNTERHBASE = 12'hB80, - MTIMEH = 12'hB81, // this is a memory-mapped register; no such CSR exists, and access should fault - MHPMEVENTBASE = 12'h320, - HPMCOUNTERBASE = 12'hC00, - HPMCOUNTERHBASE = 12'hC80, - TIME = 12'hC01, - TIMEH = 12'hC81 -) ( +module csrc import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic StallE, StallM, input logic FlushM, @@ -73,6 +63,16 @@ module csrc import cvw::*; #(parameter cvw_t P, output logic IllegalCSRCAccessM ); + localparam MHPMCOUNTERBASE = 12'hB00; + localparam MTIME = 12'hB01; // this is a memory-mapped register; no such CSR exists, and access should faul; + localparam MHPMCOUNTERHBASE = 12'hB80; + localparam MTIMEH = 12'hB81; // this is a memory-mapped register; no such CSR exists, and access should fault + localparam MHPMEVENTBASE = 12'h320; + localparam HPMCOUNTERBASE = 12'hC00; + localparam HPMCOUNTERHBASE = 12'hC80; + localparam TIME = 12'hC01; + localparam TIMEH = 12'hC81; + logic [4:0] CounterNumM; logic [P.XLEN-1:0] HPMCOUNTER_REGW[P.COUNTERS-1:0]; logic [P.XLEN-1:0] HPMCOUNTERH_REGW[P.COUNTERS-1:0]; diff --git a/src/privileged/csri.sv b/src/privileged/csri.sv index d50c65eee..7da8985d5 100644 --- a/src/privileged/csri.sv +++ b/src/privileged/csri.sv @@ -27,11 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module csri import cvw::*; #(parameter cvw_t P, - MIE = 12'h304, - MIP = 12'h344, - SIE = 12'h104, - SIP = 12'h144) ( +module csri import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic CSRMWriteM, CSRSWriteM, input logic [P.XLEN-1:0] CSRWriteValM, @@ -46,6 +42,11 @@ module csri import cvw::*; #(parameter cvw_t P, logic WriteMIPM, WriteMIEM, WriteSIPM, WriteSIEM; logic STIP; + localparam MIE = 12'h304; + localparam MIP = 12'h344; + localparam SIE = 12'h104; + localparam SIP = 12'h144; + // Interrupt Write Enables assign WriteMIPM = CSRMWriteM & (CSRAdrM == MIP); assign WriteMIEM = CSRMWriteM & (CSRAdrM == MIE); diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index ab9d41683..44cdc2c94 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -31,45 +31,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module csrm import cvw::*; #(parameter cvw_t P, - // Machine CSRs - MVENDORID = 12'hF11, - MARCHID = 12'hF12, - MIMPID = 12'hF13, - MHARTID = 12'hF14, - MCONFIGPTR = 12'hF15, - MSTATUS = 12'h300, - MISA_ADR = 12'h301, - MEDELEG = 12'h302, - MIDELEG = 12'h303, - MIE = 12'h304, - MTVEC = 12'h305, - MCOUNTEREN = 12'h306, - MSTATUSH = 12'h310, - MCOUNTINHIBIT = 12'h320, - MSCRATCH = 12'h340, - MEPC = 12'h341, - MCAUSE = 12'h342, - MTVAL = 12'h343, - MIP = 12'h344, - MTINST = 12'h34A, - PMPCFG0 = 12'h3A0, - // .. up to 15 more at consecutive addresses - PMPADDR0 = 12'h3B0, - // ... up to 63 more at consecutive addresses - TSELECT = 12'h7A0, - TDATA1 = 12'h7A1, - TDATA2 = 12'h7A2, - TDATA3 = 12'h7A3, - DCSR = 12'h7B0, - DPC = 12'h7B1, - DSCRATCH0 = 12'h7B2, - DSCRATCH1 = 12'h7B3, - // Constants - ZERO = {(P.XLEN){1'b0}}, - MEDELEG_MASK = 16'hB3FF, - MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable -) ( +module csrm import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic UngatedCSRMWriteM, CSRMWriteM, MTrapM, input logic [11:0] CSRAdrM, @@ -94,6 +56,44 @@ module csrm import cvw::*; #(parameter cvw_t P, logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; + // Machine CSRs + localparam MVENDORID = 12'hF11; + localparam MARCHID = 12'hF12; + localparam MIMPID = 12'hF13; + localparam MHARTID = 12'hF14; + localparam MCONFIGPTR = 12'hF15; + localparam MSTATUS = 12'h300; + localparam MISA_ADR = 12'h301; + localparam MEDELEG = 12'h302; + localparam MIDELEG = 12'h303; + localparam MIE = 12'h304; + localparam MTVEC = 12'h305; + localparam MCOUNTEREN = 12'h306; + localparam MSTATUSH = 12'h310; + localparam MCOUNTINHIBIT = 12'h320; + localparam MSCRATCH = 12'h340; + localparam MEPC = 12'h341; + localparam MCAUSE = 12'h342; + localparam MTVAL = 12'h343; + localparam MIP = 12'h344; + localparam MTINST = 12'h34A; + localparam PMPCFG0 = 12'h3A0; + // .. up to 15 more at consecutive addresses + localparam PMPADDR0 = 12'h3B0; + // ... up to 63 more at consecutive addresses + localparam TSELECT = 12'h7A0; + localparam TDATA1 = 12'h7A1; + localparam TDATA2 = 12'h7A2; + localparam TDATA3 = 12'h7A3; + localparam DCSR = 12'h7B0; + localparam DPC = 12'h7B1; + localparam DSCRATCH0 = 12'h7B2; + localparam DSCRATCH1 = 12'h7B3; + // Constants + localparam ZERO = {(P.XLEN){1'b0}}; + localparam MEDELEG_MASK = 16'hB3FF; + localparam MIDELEG_MASK = 12'h222; // we choose to not make machine interrupts delegable + // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop genvar i; if (P.PMP_ENTRIES > 0) begin:pmp diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 945a13c3b..9a8bb9d8e 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -28,20 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module csrs import cvw::*; #(parameter cvw_t P, - // Supervisor CSRs - SSTATUS = 12'h100, - SIE = 12'h104, - STVEC = 12'h105, - SCOUNTEREN = 12'h106, - SSCRATCH = 12'h140, - SEPC = 12'h141, - SCAUSE = 12'h142, - STVAL = 12'h143, - SIP= 12'h144, - STIMECMP = 12'h14D, - STIMECMPH = 12'h15D, - SATP = 12'h180) ( +module csrs import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic CSRSWriteM, STrapM, input logic [11:0] CSRAdrM, @@ -62,6 +49,19 @@ module csrs import cvw::*; #(parameter cvw_t P, output logic STimerInt ); + // Supervisor CSRs + localparam SSTATUS = 12'h100; + localparam SIE = 12'h104; + localparam STVEC = 12'h105; + localparam SCOUNTEREN = 12'h106; + localparam SSCRATCH = 12'h140; + localparam SEPC = 12'h141; + localparam SCAUSE = 12'h142; + localparam STVAL = 12'h143; + localparam SIP= 12'h144; + localparam STIMECMP = 12'h14D; + localparam STIMECMPH = 12'h15D; + localparam SATP = 12'h180; // Constants localparam ZERO = {(P.XLEN){1'b0}}; localparam SEDELEG_MASK = ~(ZERO | {{P.XLEN-3{1'b0}}, 3'b111} << 9); diff --git a/src/privileged/csru.sv b/src/privileged/csru.sv index 941b13f45..d394594ae 100644 --- a/src/privileged/csru.sv +++ b/src/privileged/csru.sv @@ -26,10 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module csru import cvw::*; #(parameter cvw_t P, - FFLAGS = 12'h001, - FRM = 12'h002, - FCSR = 12'h003) ( +module csru import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic InstrValidNotFlushedM, input logic CSRUWriteM, @@ -43,6 +40,10 @@ module csru import cvw::*; #(parameter cvw_t P, output logic IllegalCSRUAccessM ); + localparam FFLAGS = 12'h001; + localparam FRM = 12'h002; + localparam FCSR = 12'h003; + logic [4:0] FFLAGS_REGW; logic [2:0] NextFRMM; logic [4:0] NextFFLAGSM; From c76eb315bc528ad1cefa6f2504ddf8f1d4e0543c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 14:12:25 -0500 Subject: [PATCH 16/20] Parameterized fpu's unpack and fma using Lim's method. --- src/fpu/fhazard.sv | 2 - src/fpu/fma/fma.sv | 34 ++++--- src/fpu/fma/fmaadd.sv | 30 +++--- src/fpu/fma/fmaalign.sv | 30 +++--- src/fpu/fma/fmaexpadd.sv | 10 +- src/fpu/fma/fmalza.sv | 8 +- src/fpu/fma/fmamult.sv | 8 +- src/fpu/fma/fmasign.sv | 2 - src/fpu/fpu.sv | 6 +- src/fpu/fregfile.sv | 10 +- src/fpu/unpack.sv | 21 ++--- src/fpu/unpackinput.sv | 191 +++++++++++++++++++-------------------- 12 files changed, 166 insertions(+), 186 deletions(-) diff --git a/src/fpu/fhazard.sv b/src/fpu/fhazard.sv index dadbf6d4f..0877abd9a 100644 --- a/src/fpu/fhazard.sv +++ b/src/fpu/fhazard.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module fhazard( input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses diff --git a/src/fpu/fma/fma.sv b/src/fpu/fma/fma.sv index 437c698ec..80f040171 100644 --- a/src/fpu/fma/fma.sv +++ b/src/fpu/fma/fma.sv @@ -26,22 +26,20 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fma( +module fma import cvw::*; #(parameter cvw_t P) ( input logic Xs, Ys, Zs, // input's signs - input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format - input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format + input logic [P.NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format + input logic [P.NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic XZero, YZero, ZZero, // is the input zero input logic [2:0] OpCtrl, // operation control output logic ASticky, // sticky bit that is calculated during alignment - output logic [3*`NF+3:0] Sm, // the positive sum's significand + output logic [3*P.NF+3:0] Sm, // the positive sum's significand output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign output logic Ss, // the sum's sign - output logic [`NE+1:0] Se, // the sum's exponent - output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count + output logic [P.NE+1:0] Se, // the sum's exponent + output logic [$clog2(3*P.NF+5)-1:0] SCnt // normalization shift count ); // OpCtrl: @@ -54,12 +52,12 @@ module fma( // 110 - add // 111 - sub - logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) - logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted - logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) + logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format + logic [3*P.NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) + logic [3*P.NF+3:0] AmInv; // aligned addend's mantissa possibly inverted + logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) logic KillProd; // set the product to zero before addition if the product is too small to matter - logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign + logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -71,10 +69,10 @@ module fma( // calculate the product's exponent - fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe); + fmaexpadd #(P) expadd(.Xe, .Ye, .XZero, .YZero, .Pe); // multiplication of the mantissa's - fmamult mult(.Xm, .Ym, .Pm); + fmamult #(P) mult(.Xm, .Ym, .Pm); // calculate the signs and take the opperation into account fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); @@ -82,15 +80,15 @@ module fma( /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd); + fmaalign #(P) align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd); // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); + fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); + fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/src/fpu/fma/fmaadd.sv b/src/fpu/fma/fmaadd.sv index b8b61bd6e..52a2bf6cf 100644 --- a/src/fpu/fma/fmaadd.sv +++ b/src/fpu/fma/fmaadd.sv @@ -26,25 +26,23 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmaadd( - input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) - input logic [`NE-1:0] Ze, // exponent of Z +module fmaadd import cvw::*; #(parameter cvw_t P) ( + input logic [3*P.NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [P.NE-1:0] Ze, // exponent of Z input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic [`NE+1:0] Pe, // product's exponet - input logic [2*`NF+1:0] Pm, // the product's mantissa + input logic [P.NE+1:0] Pe, // product's exponet + input logic [2*P.NF+1:0] Pm, // the product's mantissa input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 input logic ASticky, // Alighed addend's sticky bit - output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed + output logic [3*P.NF+3:0] AmInv, // aligned addend possibly inverted + output logic [2*P.NF+1:0] PmKilled, // the product's mantissa possibly killed output logic Ss, // sum's sign - output logic [`NE+1:0] Se, // sum's exponent - output logic [3*`NF+3:0] Sm // the positive sum + output logic [P.NE+1:0] Se, // sum's exponent + output logic [3*P.NF+3:0] Sm // the positive sum ); - logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum + logic [3*P.NF+3:0] PreSum, NegPreSum; // possibly negitive sum logic NegSum; // was the sum negitive /////////////////////////////////////////////////////////////////////////////// @@ -52,9 +50,9 @@ module fmaadd( /////////////////////////////////////////////////////////////////////////////// // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition - assign AmInv = {3*`NF+4{InvA}}^Am; + assign AmInv = {3*P.NF+4{InvA}}^Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = {2*`NF+2{~KillProd}}&Pm; + assign PmKilled = {2*P.NF+2{~KillProd}}&Pm; // Do the addition // - calculate a positive and negitive sum in parallel // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum @@ -63,8 +61,8 @@ module fmaadd( // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA // in this case this result is only ever selected when InvA=1 so we can remove &InvA - assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; - assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0}; + assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; + assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0}; // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; diff --git a/src/fpu/fma/fmaalign.sv b/src/fpu/fma/fmaalign.sv index b51c2a7f1..06817d446 100644 --- a/src/fpu/fma/fmaalign.sv +++ b/src/fpu/fma/fmaalign.sv @@ -27,20 +27,18 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmaalign( - input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format - input logic [`NF:0] Zm, // significand in U(0.NF) format] +module fmaalign import cvw::*; #(parameter cvw_t P) ( + input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format + input logic [P.NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero,// is the input zero - output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic [3*P.NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); - logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*P.NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*P.NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; // should the addend be killed /////////////////////////////////////////////////////////////////////////////// @@ -51,16 +49,16 @@ module fmaalign( // - negitive means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze}; + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze}; // Defualt Addition with only inital left shift // | 53'b0 | 106'b(product) | 1'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*`NF+3)'(0)}; + assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)}; - assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3)); + assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3)); always_comb begin // If the product is too small to effect the sum, kill the product @@ -68,7 +66,7 @@ module fmaalign( // | 53'b0 | 106'b(product) | 1'b0 | // | addnend | if (KillProd) begin - ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)}; + ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)}; ASticky = ~(XZero|YZero); // If the addend is too small to effect the addition @@ -86,12 +84,12 @@ module fmaalign( // | addnend | end else begin ZmShifted = ZmPreshifted >> ACnt; - ASticky = |(ZmShifted[`NF-1:0]); + ASticky = |(ZmShifted[P.NF-1:0]); end end - assign Am = ZmShifted[4*`NF+3:`NF]; + assign Am = ZmShifted[4*P.NF+3:P.NF]; endmodule diff --git a/src/fpu/fma/fmaexpadd.sv b/src/fpu/fma/fmaexpadd.sv index 3c615274f..d6ef8844c 100644 --- a/src/fpu/fma/fmaexpadd.sv +++ b/src/fpu/fma/fmaexpadd.sv @@ -26,18 +26,16 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmaexpadd( - input logic [`NE-1:0] Xe, Ye, // input's exponents +module fmaexpadd import cvw::*; #(parameter cvw_t P) ( + input logic [P.NE-1:0] Xe, Ye, // input's exponents input logic XZero, YZero, // are the inputs zero - output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 + output logic [P.NE+1:0] Pe // product's exponent B^(1023)NE+2 ); logic PZero; // is the product zero? // kill the exponent if the product is zero - either X or Y is 0 assign PZero = XZero | YZero; - assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)}); + assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)}); endmodule diff --git a/src/fpu/fma/fmalza.sv b/src/fpu/fma/fmalza.sv index 59fb3fc3e..b7ccbef8e 100644 --- a/src/fpu/fma/fmalza.sv +++ b/src/fpu/fma/fmalza.sv @@ -27,11 +27,9 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmalza #(WIDTH) ( +module fmalza #(WIDTH, NF) ( input logic [WIDTH-1:0] A, // addend - input logic [2*`NF+1:0] Pm, // product + input logic [2*NF+1:0] Pm, // product input logic Cin, // carry in input logic sub, // subtraction output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result @@ -42,7 +40,7 @@ module fmalza #(WIDTH) ( logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1 - assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product + assign B = {{(NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product assign P = A^B; assign G = A&B; diff --git a/src/fpu/fma/fmamult.sv b/src/fpu/fma/fmamult.sv index 541ba3687..7d01a22d6 100644 --- a/src/fpu/fma/fmamult.sv +++ b/src/fpu/fma/fmamult.sv @@ -26,11 +26,9 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmamult( - input logic [`NF:0] Xm, Ym, // x and y significand - output logic [2*`NF+1:0] Pm // product's significand +module fmamult import cvw::*; #(parameter cvw_t P) ( + input logic [P.NF:0] Xm, Ym, // x and y significand + output logic [2*P.NF+1:0] Pm // product's significand ); assign Pm = Xm * Ym; diff --git a/src/fpu/fma/fmasign.sv b/src/fpu/fma/fmasign.sv index a2308da7f..0091778ee 100644 --- a/src/fpu/fma/fmasign.sv +++ b/src/fpu/fma/fmasign.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module fmasign( input logic [2:0] OpCtrl, // opperation contol input logic Xs, Ys, Zs, // sign of the inputs diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 91d2d5354..338aa78a6 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -175,7 +175,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E); // FP register file - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + fregfile #(P.FLEN) fregfile (.clk, .reset, .we4(FRegWriteW), .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), .wd4(FResultW), .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); @@ -225,7 +225,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( mux3 #(P.FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity - unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), + unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), @@ -233,7 +233,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE)); // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub - fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), + fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE), .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); diff --git a/src/fpu/fregfile.sv b/src/fpu/fregfile.sv index 69961a847..74d4d84e1 100644 --- a/src/fpu/fregfile.sv +++ b/src/fpu/fregfile.sv @@ -26,17 +26,15 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fregfile ( +module fregfile #(parameter FLEN) ( input logic clk, reset, input logic we4, // write enable input logic [4:0] a1, a2, a3, a4, // adresses - input logic [`FLEN-1:0] wd4, // write data - output logic [`FLEN-1:0] rd1, rd2, rd3 // read data + input logic [FLEN-1:0] wd4, // write data + output logic [FLEN-1:0] rd1, rd2, rd3 // read data ); - logic [`FLEN-1:0] rf[31:0]; + logic [FLEN-1:0] rf[31:0]; integer i; // three ported register file diff --git a/src/fpu/unpack.sv b/src/fpu/unpack.sv index d52b454ea..14e9a6f66 100644 --- a/src/fpu/unpack.sv +++ b/src/fpu/unpack.sv @@ -25,41 +25,40 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module unpack ( - input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half +module unpack import cvw::*; #(parameter cvw_t P) ( + input logic [P.FLEN-1:0] X, Y, Z, // inputs from register file + input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half input logic XEn, YEn, ZEn, // input enables output logic Xs, Ys, Zs, // sign bits of XYZ - output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic [P.NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [P.NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) output logic XNaN, YNaN, ZNaN, // is XYZ a NaN output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN output logic XSubnorm, // is X subnormal output logic XZero, YZero, ZZero, // is XYZ zero output logic XInf, YInf, ZInf, // is XYZ infinity output logic XExpMax, // does X have the maximum exponent (NaN or Inf) - output logic [`FLEN-1:0] XPostBox // X after being properly NaN-boxed + output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed ); logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), + unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), .Subnorm(XSubnorm), .PostBox(XPostBox)); - unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), + unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), .Subnorm(), .PostBox()); - unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), + unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), .Subnorm(), .PostBox()); - endmodule \ No newline at end of file + endmodule diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index 4f7852455..6e4841504 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -25,15 +25,14 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module unpackinput ( - input logic [`FLEN-1:0] In, // inputs from register file +module unpackinput import cvw::*; #(parameter cvw_t P) ( + input logic [P.FLEN-1:0] In, // inputs from register file input logic En, // enable the input - input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of the number - output logic [`NE-1:0] Exp, // exponent of the number (converted to largest supported precision) - output logic [`NF:0] Man, // mantissa of the number (converted to largest supported precision) + output logic [P.NE-1:0] Exp, // exponent of the number (converted to largest supported precision) + output logic [P.NF:0] Man, // mantissa of the number (converted to largest supported precision) output logic NaN, // is the number a NaN output logic SNaN, // is the number a signaling NaN output logic Zero, // is the number zero @@ -42,29 +41,29 @@ module unpackinput ( output logic FracZero, // is the fraction zero output logic ExpMax, // does In have the maximum exponent (NaN or Inf) output logic Subnorm, // is the number subnormal - output logic [`FLEN-1:0] PostBox // Number reboxed correctly as a NaN + output logic [P.FLEN-1:0] PostBox // Number reboxed correctly as a NaN ); - logic [`NF-1:0] Frac; // Fraction of XYZ + logic [P.NF-1:0] Frac; // Fraction of XYZ logic BadNaNBox; // incorrectly NaN Boxed - if (`FPSIZES == 1) begin // if there is only one floating point format supported + if (P.FPSIZES == 1) begin // if there is only one floating point format supported assign BadNaNBox = 0; - assign Sgn = In[`FLEN-1]; // sign bit - assign Frac = In[`NF-1:0]; // fraction (no assumed 1) - assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero - assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1 - assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's + assign Sgn = In[P.FLEN-1]; // sign bit + assign Frac = In[P.NF-1:0]; // fraction (no assumed 1) + assign ExpNonZero = |In[P.FLEN-2:P.NF]; // is the exponent non-zero + assign Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1 + assign ExpMax = &In[P.FLEN-2:P.NF]; // is the exponent all 1's assign PostBox = In; - end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported + end else if (P.FPSIZES == 2) begin // if there are 2 floating point formats supported // largest format | smaller format //---------------------------------- - // `FLEN | `LEN1 length of floating point number - // `NE | `NE1 length of exponent - // `NF | `NF1 length of fraction - // `BIAS | `BIAS1 exponent's bias value - // `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10 + // P.FLEN | P.LEN1 length of floating point number + // P.NE | P.NE1 length of exponent + // P.NF | P.NF1 length of fraction + // P.BIAS | P.BIAS1 exponent's bias value + // P.FMT | P.FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10 // Possible combinantions specified by spec: // double and single @@ -76,22 +75,22 @@ module unpackinput ( // quad and half // double and half - assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing always_comb if (BadNaNBox) begin -// PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]}; - PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}}; +// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; + PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; end else PostBox = In; // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive + assign Sgn = Fmt ? In[P.FLEN-1] : (BadNaNBox ? 0 : In[P.LEN1-1]); // improperly boxed NaNs are treated as positive // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; + assign Frac = Fmt ? In[P.NF-1:0] : {In[P.NF1-1:0], (P.NF-P.NF1)'(0)}; // is the exponent non-zero - assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; + assign ExpNonZero = Fmt ? |In[P.FLEN-2:P.NF] : |In[P.LEN1-2:P.NF1]; // example double to single conversion: // 1023 = 0011 1111 1111 @@ -103,21 +102,21 @@ module unpackinput ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a Subnormal number convert the exponent value 1 - assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + assign Exp = Fmt ? {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero} : {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero}; // is the exponent all 1's - assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; + assign ExpMax = Fmt ? &In[P.FLEN-2:P.NF] : &In[P.LEN1-2:P.NF1]; - end else if (`FPSIZES == 3) begin // three floating point precsions supported + end else if (P.FPSIZES == 3) begin // three floating point precsions supported // largest format | larger format | smallest format //--------------------------------------------------- - // `FLEN | `LEN1 | `LEN2 length of floating point number - // `NE | `NE1 | `NE2 length of exponent - // `NF | `NF1 | `NF2 length of fraction - // `BIAS | `BIAS1 | `BIAS2 exponent's bias value - // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10 + // P.FLEN | P.LEN1 | P.LEN2 length of floating point number + // P.NE | P.NE1 | P.NE2 length of exponent + // P.NF | P.NF1 | P.NF2 length of fraction + // P.BIAS | P.BIAS1 | P.BIAS2 exponent's bias value + // P.FMT | P.FMT1 | P.FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10 // Possible combinantions specified by spec: // quad and double and single @@ -130,20 +129,20 @@ module unpackinput ( // Check NaN boxing always_comb case (Fmt) - `FMT: BadNaNBox = 0; - `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; - `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; + P.FMT: BadNaNBox = 0; + P.FMT1: BadNaNBox = ~&In[P.FLEN-1:P.LEN1]; + P.FMT2: BadNaNBox = ~&In[P.FLEN-1:P.LEN2]; default: BadNaNBox = 1'bx; endcase always_comb if (BadNaNBox) begin case (Fmt) - `FMT: PostBox = In; -// `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]}; -// `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]}; - `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}}; - `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}}; + P.FMT: PostBox = In; +// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; +// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]}; + P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; + P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}}; default: PostBox = 'x; endcase end else @@ -154,27 +153,27 @@ module unpackinput ( if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive else case (Fmt) - `FMT: Sgn = In[`FLEN-1]; - `FMT1: Sgn = In[`LEN1-1]; - `FMT2: Sgn = In[`LEN2-1]; + P.FMT: Sgn = In[P.FLEN-1]; + P.FMT1: Sgn = In[P.LEN1-1]; + P.FMT2: Sgn = In[P.LEN2-1]; default: Sgn = 1'bx; endcase // extract the fraction always_comb case (Fmt) - `FMT: Frac = In[`NF-1:0]; - `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; - `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; - default: Frac = {`NF{1'bx}}; + P.FMT: Frac = In[P.NF-1:0]; + P.FMT1: Frac = {In[P.NF1-1:0], (P.NF-P.NF1)'(0)}; + P.FMT2: Frac = {In[P.NF2-1:0], (P.NF-P.NF2)'(0)}; + default: Frac = {P.NF{1'bx}}; endcase // is the exponent non-zero always_comb case (Fmt) - `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) - `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) - `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) + P.FMT: ExpNonZero = |In[P.FLEN-2:P.NF]; // if input is largest precision (P.FLEN - ie quad or double) + P.FMT1: ExpNonZero = |In[P.LEN1-2:P.NF1]; // if input is larger precsion (P.LEN1 - double or single) + P.FMT2: ExpNonZero = |In[P.LEN2-2:P.NF2]; // if input is smallest precsion (P.LEN2 - single or half) default: ExpNonZero = 1'bx; endcase @@ -189,50 +188,50 @@ module unpackinput ( // convert the larger precision's exponent to use the largest precision's bias always_comb case (Fmt) - `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; - `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; - `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; - default: Exp = {`NE{1'bx}}; + P.FMT: Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero}; + P.FMT1: Exp = {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero}; + P.FMT2: Exp = {In[P.LEN2-2], {P.NE-P.NE2{~In[P.LEN2-2]}}, In[P.LEN2-3:P.NF2+1], In[P.NF2]|~ExpNonZero}; + default: Exp = {P.NE{1'bx}}; endcase // is the exponent all 1's always_comb case (Fmt) - `FMT: ExpMax = &In[`FLEN-2:`NF]; - `FMT1: ExpMax = &In[`LEN1-2:`NF1]; - `FMT2: ExpMax = &In[`LEN2-2:`NF2]; + P.FMT: ExpMax = &In[P.FLEN-2:P.NF]; + P.FMT1: ExpMax = &In[P.LEN1-2:P.NF1]; + P.FMT2: ExpMax = &In[P.LEN2-2:P.NF2]; default: ExpMax = 1'bx; endcase - end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half + end else if (P.FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half // quad | double | single | half //------------------------------------------------------------------- - // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number - // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent - // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction - // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value - // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10 + // P.Q_LEN | P.D_LEN | P.S_LEN | P.H_LEN length of floating point number + // P.Q_NE | P.D_NE | P.S_NE | P.H_NE length of exponent + // P.Q_NF | P.D_NF | P.S_NF | P.H_NF length of fraction + // P.Q_BIAS | P.D_BIAS | P.S_BIAS | P.H_BIAS exponent's bias value + // P.Q_FMT | P.D_FMT | P.S_FMT | P.H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10 // Check NaN boxing always_comb case (Fmt) 2'b11: BadNaNBox = 0; - 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; - 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; - 2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN]; + 2'b01: BadNaNBox = ~&In[P.Q_LEN-1:P.D_LEN]; + 2'b00: BadNaNBox = ~&In[P.Q_LEN-1:P.S_LEN]; + 2'b10: BadNaNBox = ~&In[P.Q_LEN-1:P.H_LEN]; endcase always_comb if (BadNaNBox) begin case (Fmt) 2'b11: PostBox = In; -// 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]}; -// 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]}; -// 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]}; - 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}}; - 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}}; - 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}}; +// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]}; +// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]}; +// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]}; + 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}}; + 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}}; + 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}}; endcase end else PostBox = In; @@ -242,29 +241,29 @@ module unpackinput ( if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive else case (Fmt) - 2'b11: Sgn = In[`Q_LEN-1]; - 2'b01: Sgn = In[`D_LEN-1]; - 2'b00: Sgn = In[`S_LEN-1]; - 2'b10: Sgn = In[`H_LEN-1]; + 2'b11: Sgn = In[P.Q_LEN-1]; + 2'b01: Sgn = In[P.D_LEN-1]; + 2'b00: Sgn = In[P.S_LEN-1]; + 2'b10: Sgn = In[P.H_LEN-1]; endcase // extract the fraction always_comb case (Fmt) - 2'b11: Frac = In[`Q_NF-1:0]; - 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - 2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + 2'b11: Frac = In[P.Q_NF-1:0]; + 2'b01: Frac = {In[P.D_NF-1:0], (P.Q_NF-P.D_NF)'(0)}; + 2'b00: Frac = {In[P.S_NF-1:0], (P.Q_NF-P.S_NF)'(0)}; + 2'b10: Frac = {In[P.H_NF-1:0], (P.Q_NF-P.H_NF)'(0)}; endcase // is the exponent non-zero always_comb case (Fmt) - 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; - 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; - 2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; + 2'b11: ExpNonZero = |In[P.Q_LEN-2:P.Q_NF]; + 2'b01: ExpNonZero = |In[P.D_LEN-2:P.D_NF]; + 2'b00: ExpNonZero = |In[P.S_LEN-2:P.S_NF]; + 2'b10: ExpNonZero = |In[P.H_LEN-2:P.H_NF]; endcase @@ -280,20 +279,20 @@ module unpackinput ( // 1 is added to the exponent if the input is zero or subnormal always_comb case (Fmt) - 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; - 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; - 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; - 2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; + 2'b11: Exp = {In[P.Q_LEN-2:P.Q_NF+1], In[P.Q_NF]|~ExpNonZero}; + 2'b01: Exp = {In[P.D_LEN-2], {P.Q_NE-P.D_NE{~In[P.D_LEN-2]}}, In[P.D_LEN-3:P.D_NF+1], In[P.D_NF]|~ExpNonZero}; + 2'b00: Exp = {In[P.S_LEN-2], {P.Q_NE-P.S_NE{~In[P.S_LEN-2]}}, In[P.S_LEN-3:P.S_NF+1], In[P.S_NF]|~ExpNonZero}; + 2'b10: Exp = {In[P.H_LEN-2], {P.Q_NE-P.H_NE{~In[P.H_LEN-2]}}, In[P.H_LEN-3:P.H_NF+1], In[P.H_NF]|~ExpNonZero}; endcase // is the exponent all 1's always_comb case (Fmt) - 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; - 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; - 2'b10: ExpMax = &In[`H_LEN-2:`H_NF]; + 2'b11: ExpMax = &In[P.Q_LEN-2:P.Q_NF]; + 2'b01: ExpMax = &In[P.D_LEN-2:P.D_NF]; + 2'b00: ExpMax = &In[P.S_LEN-2:P.S_NF]; + 2'b10: ExpMax = &In[P.H_LEN-2:P.H_NF]; endcase end @@ -302,9 +301,9 @@ module unpackinput ( assign FracZero = ~|Frac & ~BadNaNBox; // is the fraction zero? assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? - assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? + assign SNaN = NaN&~Frac[P.NF-1]&~BadNaNBox; // is the input a singnaling NaN? assign Inf = ExpMax & FracZero & En; // is the input infinity? assign Zero = ~ExpNonZero & FracZero; // is the input zero? assign Subnorm = ~ExpNonZero & ~FracZero & ~BadNaNBox; // is the input subnormal -endmodule \ No newline at end of file +endmodule From ef2bb7df93c9df5c1dbe5b4afeb0337c7dca255f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 14:25:14 -0500 Subject: [PATCH 17/20] fdiv is now parameterized using Lim's method. --- src/fpu/fdivsqrt/fdivsqrt.sv | 42 +++++----- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 50 ++++++------ src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 52 ++++++------ src/fpu/fdivsqrt/fdivsqrtfgen2.sv | 10 +-- src/fpu/fdivsqrt/fdivsqrtfgen4.sv | 12 ++- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 14 ++-- src/fpu/fdivsqrt/fdivsqrtiter.sv | 84 ++++++++++---------- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 82 ++++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 114 +++++++++++++-------------- src/fpu/fdivsqrt/fdivsqrtqsel2.sv | 2 - src/fpu/fdivsqrt/fdivsqrtqsel4.sv | 2 - src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv | 2 - src/fpu/fdivsqrt/fdivsqrtstage2.sv | 37 +++++---- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 42 +++++----- src/fpu/fdivsqrt/fdivsqrtuotfc2.sv | 14 ++-- src/fpu/fdivsqrt/fdivsqrtuotfc4.sv | 12 ++- src/fpu/fpu.sv | 2 +- 17 files changed, 271 insertions(+), 302 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index f7a443639..1a1b893e0 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -26,15 +26,13 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrt( +module fdivsqrt import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, - input logic [`FMTBITS-1:0] FmtE, + input logic [P.FMTBITS-1:0] FmtE, input logic XsE, - input logic [`NF:0] XmE, YmE, - input logic [`NE-1:0] XeE, YeE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, @@ -42,39 +40,39 @@ module fdivsqrt( input logic StallM, input logic FlushE, input logic SqrtE, SqrtM, - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic IntDivE, W64E, output logic DivStickyM, output logic FDivBusyE, IFDivStartE, FDivDoneE, - output logic [`NE+1:0] QeM, - output logic [`DIVb:0] QmM, - output logic [`XLEN-1:0] FIntDivResultM + output logic [P.NE+1:0] QeM, + output logic [P.DIVb:0] QmM, + output logic [P.XLEN-1:0] FIntDivResultM ); // Floating-point division and square root module, with optional integer division and remainder // Computes X/Y, sqrt(X), A/B, or A%B - logic [`DIVb+3:0] WS, WC; // Partial remainder components - logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb+3:0] D; // Iterator Divisor - logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values - logic [`DIVb+1:0] FirstC; // Step tracker + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag - logic [`DURLEN-1:0] CyclesE; // FSM cycles + logic [P.DURLEN-1:0] CyclesE; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [`DIVBLEN:0] nM, mM; // Shift amounts + logic [P.DIVBLEN:0] nM, mM; // Shift amounts logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor - logic [`XLEN-1:0] AM; // Original Numerator for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases - fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor + fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, // Int-specific @@ -82,18 +80,18 @@ module fdivsqrt( .BZeroM, .nM, .mM, .AM, .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); - fdivsqrtfsm fdivsqrtfsm( // FSM + fdivsqrtfsm #(P) fdivsqrtfsm( // FSM .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, // Int-specific .IDivStartE, .ISpecialCaseE, .IntDivE); - fdivsqrtiter fdivsqrtiter( // CSA Iterator + fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); - fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor + fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZeroE, .DivStickyM, diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 2e17cc25b..ea89ce7c6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -26,51 +26,49 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtcycles( - input logic [`FMTBITS-1:0] FmtE, +module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [`DIVBLEN:0] nE, - output logic [`DURLEN-1:0] CyclesE + input logic [P.DIVBLEN:0] nE, + output logic [P.DURLEN-1:0] CyclesE ); - logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits - // DIVN = `NF+3 + logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits + // DIVN = P.NF+3 // NS = NF + 1 // N = NS or NS+2 for div/sqrt. /* verilator lint_off WIDTH */ - if (`FPSIZES == 1) - assign Nf = `NF; - else if (`FPSIZES == 2) + if (P.FPSIZES == 1) + assign Nf = P.NF; + else if (P.FPSIZES == 2) always_comb case (FmtE) - 1'b0: Nf = `NF1; - 1'b1: Nf = `NF; + 1'b0: Nf = P.NF1; + 1'b1: Nf = P.NF; endcase - else if (`FPSIZES == 3) + else if (P.FPSIZES == 3) always_comb case (FmtE) - `FMT: Nf = `NF; - `FMT1: Nf = `NF1; - `FMT2: Nf = `NF2; + P.FMT: Nf = P.NF; + P.FMT1: Nf = P.NF1; + P.FMT2: Nf = P.NF2; endcase - else if (`FPSIZES == 4) + else if (P.FPSIZES == 4) always_comb case(FmtE) - `S_FMT: Nf = `S_NF; - `D_FMT: Nf = `D_NF; - `H_FMT: Nf = `H_NF; - `Q_FMT: Nf = `Q_NF; + P.S_FMT: Nf = P.S_NF; + P.D_FMT: Nf = P.D_NF; + P.H_FMT: Nf = P.H_NF; + P.Q_FMT: Nf = P.Q_NF; endcase always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); end /* verilator lint_on WIDTH */ -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index aa8ae051d..adc1d6bf5 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -26,49 +26,47 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtexpcalc( - input logic [`FMTBITS-1:0] Fmt, - input logic [`NE-1:0] Xe, Ye, +module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] Fmt, + input logic [P.NE-1:0] Xe, Ye, input logic Sqrt, input logic XZero, - input logic [`DIVBLEN:0] ell, m, - output logic [`NE+1:0] Qe + input logic [P.DIVBLEN:0] ell, m, + output logic [P.NE+1:0] Qe ); - logic [`NE-2:0] Bias; - logic [`NE+1:0] SXExp; - logic [`NE+1:0] SExp; - logic [`NE+1:0] DExp; + logic [P.NE-2:0] Bias; + logic [P.NE+1:0] SXExp; + logic [P.NE+1:0] SExp; + logic [P.NE+1:0] DExp; - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); + if (P.FPSIZES == 1) begin + assign Bias = (P.NE-1)'(P.BIAS); - end else if (`FPSIZES == 2) begin - assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + end else if (P.FPSIZES == 2) begin + assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin always_comb case (Fmt) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); + P.FMT: Bias = (P.NE-1)'(P.BIAS); + P.FMT1: Bias = (P.NE-1)'(P.BIAS1); + P.FMT2: Bias = (P.NE-1)'(P.BIAS2); default: Bias = 'x; endcase - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb case (Fmt) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); + 2'h3: Bias = (P.NE-1)'(P.Q_BIAS); + 2'h1: Bias = (P.NE-1)'(P.D_BIAS); + 2'h0: Bias = (P.NE-1)'(P.S_BIAS); + 2'h2: Bias = (P.NE-1)'(P.H_BIAS); endcase end - assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); - assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; + assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); + assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; // correct exponent for subnormal input's normalization shifts - assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}); + assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); assign Qe = Sqrt ? SExp : DExp; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv index 250fb4fbd..73afeb527 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv @@ -26,14 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtfgen2 ( +module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) ( input logic up, uz, - input logic [`DIVb+3:0] C, U, UM, - output logic [`DIVb+3:0] F + input logic [P.DIVb+3:0] C, U, UM, + output logic [P.DIVb+3:0] F ); - logic [`DIVb+3:0] FP, FN, FZ; + logic [P.DIVb+3:0] FP, FN, FZ; // Generate for both positive and negative bits assign FP = ~(U << 1) & C; diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv index a07e504f4..e0f19957e 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv @@ -26,14 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtfgen4 ( +module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) ( input logic [3:0] udigit, - input logic [`DIVb+3:0] C, U, UM, - output logic [`DIVb+3:0] F + input logic [P.DIVb+3:0] C, U, UM, + output logic [P.DIVb+3:0] F ); - logic [`DIVb+3:0] F2, F1, F0, FN1, FN2; + logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Generate for both positive and negative bits assign F2 = (~U << 2) & (C << 2); @@ -49,4 +47,4 @@ module fdivsqrtfgen4 ( else if (udigit[1]) F = FN1; else if (udigit[0]) F = FN2; else F = F0; -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index ba0758ee6..a10c9f6c9 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -26,9 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtfsm( +module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, input logic XInfE, YInfE, input logic XZeroE, YZeroE, @@ -39,7 +37,7 @@ module fdivsqrtfsm( input logic StallM, FlushE, input logic IntDivE, input logic ISpecialCaseE, - input logic [`DURLEN-1:0] CyclesE, + input logic [P.DURLEN-1:0] CyclesE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -49,16 +47,16 @@ module fdivsqrtfsm( statetype state; logic SpecialCaseE, FSpecialCaseE; - logic [`DURLEN-1:0] step; + logic [P.DURLEN-1:0] step; // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division - assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM; + assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM; assign FDivDoneE = (state == DONE); assign FDivBusyE = (state == BUSY) | IFDivStartE; // terminate immediately on special cases assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE; - if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE; + if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE; else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc @@ -78,4 +76,4 @@ module fdivsqrtfsm( end end -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index aeb4bcc4d..0d9600ad5 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -26,38 +26,36 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtiter( +module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic IFDivStartE, input logic FDivBusyE, input logic SqrtE, - input logic [`DIVb+3:0] X, D, - output logic [`DIVb:0] FirstU, FirstUM, - output logic [`DIVb+1:0] FirstC, + input logic [P.DIVb+3:0] X, D, + output logic [P.DIVb:0] FirstU, FirstUM, + output logic [P.DIVb+1:0] FirstC, output logic Firstun, - output logic [`DIVb+3:0] FirstWS, FirstWC + output logic [P.DIVb+3:0] FirstWS, FirstWC ); /* verilator lint_off UNOPTFLAT */ - logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b - logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b - logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b - logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b - logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b - logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b - logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b - logic [`DIVb+1:0] initC; // Q2.b - logic [`DIVCOPIES-1:0] un; + logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b + logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b + logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b + logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b + logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b + logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b + logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b + logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b + logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b + logic [P.DIVb+1:0] initC; // Q2.b + logic [P.DIVCOPIES-1:0] un; - logic [`DIVb+3:0] WSN, WCN; // Q4.b - logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b - logic [`DIVb+1:0] NextC; - logic [`DIVb:0] UMux, UMMux; - logic [`DIVb:0] initU, initUM; + logic [P.DIVb+3:0] WSN, WCN; // Q4.b + logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b + logic [P.DIVb+1:0] NextC; + logic [P.DIVb:0] UMux, UMMux; + logic [P.DIVb:0] initU, initUM; /* verilator lint_on UNOPTFLAT */ // Top Muxes and Registers @@ -66,36 +64,36 @@ module fdivsqrtiter( // are fed back for the next iteration. // Residual WS/SC registers/initializaiton mux - mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN); - mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN); - flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]); - flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); + mux2 #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN); + mux2 #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN); + flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]); + flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise - assign initU = {SqrtE, {(`DIVb){1'b0}}}; - assign initUM = {~SqrtE, {(`DIVb){1'b0}}}; - mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); - mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); - flopen #(`DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]); - flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]); + assign initU = {SqrtE, {(P.DIVb){1'b0}}}; + assign initUM = {~SqrtE, {(P.DIVb){1'b0}}}; + mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux); + mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux); + flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]); + flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]); // C register/initialization mux // Initialize C to -1 for sqrt and -R for division logic [1:0] initCUpper; - if(`RADIX == 4) begin + if(P.RADIX == 4) begin mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper); end else begin mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper); end - assign initC = {initCUpper, {`DIVb{1'b0}}}; - mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); - flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); + assign initC = {initCUpper, {P.DIVb{1'b0}}}; + mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC); + flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); // Divisor Selections assign DBar = ~D; // for -D - if(`RADIX == 4) begin : d2 + if(P.RADIX == 4) begin : d2 assign D2 = D << 1; // for 2D, only used in R4 assign DBar2 = ~D2; // for -2D, only used in R4 end @@ -103,15 +101,15 @@ module fdivsqrtiter( // k=DIVCOPIES of the recurrence logic genvar i; generate - for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations - if (`RADIX == 2) begin: stage - fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, + for(i=0; $unsigned(i)>> `LOGR; + assign W = $signed(Sum) >>> P.LOGR; assign UnsignedQuotM = {3'b000, PreQmM}; // Integer remainder: sticky and sign correction muxes - mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM); - mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); - mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); + mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM); + mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); + mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); // Select quotient or remainder and do normalization shift - mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM); - mux2 #(`DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); + mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM); + mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); assign PreIntResultM = $signed(PreResultM >>> NormShiftM); // special case logic @@ -119,18 +117,18 @@ module fdivsqrtpostproc( always_comb if (BZeroM) begin // Divide by zero if (RemOpM) IntDivResultM = AM; - else IntDivResultM = {(`XLEN){1'b1}}; + else IntDivResultM = {(P.XLEN){1'b1}}; end else if (ALTBM) begin // Numerator is zero if (RemOpM) IntDivResultM = AM; else IntDivResultM = '0; - end else IntDivResultM = PreIntResultM[`XLEN-1:0]; + end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; // sign extend result for W64 - if (`XLEN==64) begin - mux2 #(64) resmux(IntDivResultM[`XLEN-1:0], - {{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64 + if (P.XLEN==64) begin + mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], + {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64 W64M, FIntDivResultM); end else - assign FIntDivResultM = IntDivResultM[`XLEN-1:0]; + assign FIntDivResultM = IntDivResultM[P.XLEN-1:0]; end endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 3de4b252e..2d50b9299 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -26,56 +26,54 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtpreproc ( +module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic IFDivStartE, - input logic [`NF:0] Xm, Ym, - input logic [`NE-1:0] Xe, Ye, - input logic [`FMTBITS-1:0] FmtE, + input logic [P.NF:0] Xm, Ym, + input logic [P.NE-1:0] Xe, Ye, + input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, - output logic [`NE+1:0] QeM, - output logic [`DIVb+3:0] X, D, + output logic [P.NE+1:0] QeM, + output logic [P.DIVb+3:0] X, D, // Int-specific - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DURLEN-1:0] CyclesE, - output logic [`DIVBLEN:0] nM, mM, + output logic [P.DURLEN-1:0] CyclesE, + output logic [P.DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, - output logic [`XLEN-1:0] AM + output logic [P.XLEN-1:0] AM ); - logic [`DIVb-1:0] Xfract, Dfract; - logic [`DIVb:0] PreSqrtX; - logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed - logic [`NE+1:0] QeE; // Quotient Exponent (FP only) - logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVb-1:0] Xfract, Dfract; + logic [P.DIVb:0] PreSqrtX; + logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed + logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) + logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input + logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs - logic [`XLEN-1:0] AE; // input A after W64 adjustment + logic [P.XLEN-1:0] AE; // input A after W64 adjustment logic ALTBE; ////////////////////////////////////////////////////// // Integer Preprocessing ////////////////////////////////////////////////////// - if (`IDIV_ON_FPU) begin:intpreproc // Int Supported - logic [`XLEN-1:0] BE, PosA, PosB; + if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported + logic [P.XLEN-1:0] BE, PosA, PosB; // Extract inputs, signs, zero, depending on W64 mode if applicable assign SignedDivE = ~Funct3E[0]; // Source handling - if (`XLEN==64) begin // 64-bit, supports W64 + if (P.XLEN==64) begin // 64-bit, supports W64 mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); end else begin // 32 bits only @@ -84,21 +82,21 @@ module fdivsqrtpreproc ( end assign AZeroE = ~(|AE); assign BZeroE = ~(|BE); - assign AsE = AE[`XLEN-1] & SignedDivE; - assign BsE = BE[`XLEN-1] & SignedDivE; + assign AsE = AE[P.XLEN-1] & SignedDivE; + assign BsE = BE[P.XLEN-1] & SignedDivE; assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative // Force integer inputs to be postiive - mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA); - mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB); + mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA); + mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB); // Select integer or floating point inputs - mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); - mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); + mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX); + mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD); mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); end else begin // Int not supported - assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; - assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}}; + assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}}; assign NumerZeroE = XZeroE; end @@ -107,8 +105,8 @@ module fdivsqrtpreproc ( ////////////////////////////////////////////////////// // count leading zeros for Subnorm FP and to normalize integer inputs - lzc #(`DIVb) lzcX (IFX, ell); - lzc #(`DIVb) lzcY (IFD, mE); + lzc #(P.DIVb) lzcX (IFX, ell); + lzc #(P.DIVb) lzcY (IFD, mE); // Normalization shift: shift off leading one assign Xfract = (IFX << ell) << 1; @@ -122,28 +120,28 @@ module fdivsqrtpreproc ( // and nE (number of fractional digits) ////////////////////////////////////////////////////// - if (`IDIV_ON_FPU) begin:intrightshift // Int Supported - logic [`DIVBLEN:0] ZeroDiff, p; + if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported + logic [P.DIVBLEN:0] ZeroDiff, p; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros - assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) + mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps - if (`LOGRK > 0) begin // more than 1 bit per cycle - logic [`LOGRK-1:0] IntTrunc, RightShiftX; - logic [`DIVBLEN:0] TotalIntBits, IntSteps; + if (P.LOGRK > 0) begin // more than 1 bit per cycle + logic [P.LOGRK-1:0] IntTrunc, RightShiftX; + logic [P.DIVBLEN:0] TotalIntBits, IntSteps; /* verilator lint_off WIDTH */ - assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) - assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator - assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits - assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount + assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator + assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits + assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting @@ -167,42 +165,42 @@ module fdivsqrtpreproc ( assign DivX = {3'b000, ~NumerZeroE, Xfract}; // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; + mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; - mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); ////////////////////////////////////////////////////// // Selet integer or floating-point operands ////////////////////////////////////////////////////// - if (`IDIV_ON_FPU) begin - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + if (P.IDIV_ON_FPU) begin + mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); end else begin assign X = PreShiftX; end // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); // Floating-point exponent - fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); - if (`IDIV_ON_FPU) begin:intpipelineregs + if (P.IDIV_ON_FPU) begin:intpipelineregs // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); - if (`XLEN==64) + flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (P.XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv b/src/fpu/fdivsqrt/fdivsqrtqsel2.sv index 18c577b97..fe32924e1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel2.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module fdivsqrtqsel2 ( input logic [3:0] ps, pc, output logic up, uz, un diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/src/fpu/fdivsqrt/fdivsqrtqsel4.sv index 7a0db24d8..de520bef2 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel4.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module fdivsqrtqsel4 ( input logic [2:0] Dmsbs, input logic [4:0] Smsbs, diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv index e508a6d7c..8a3d535d3 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module fdivsqrtqsel4cmp ( input logic [2:0] Dmsbs, input logic [4:0] Smsbs, diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index be62f8aa6..bb8d87234 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -26,27 +26,26 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" /* verilator lint_off UNOPTFLAT */ -module fdivsqrtstage2 ( - input logic [`DIVb+3:0] D, DBar, - input logic [`DIVb:0] U, UM, - input logic [`DIVb+3:0] WS, WC, - input logic [`DIVb+1:0] C, +module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) ( + input logic [P.DIVb+3:0] D, DBar, + input logic [P.DIVb:0] U, UM, + input logic [P.DIVb+3:0] WS, WC, + input logic [P.DIVb+1:0] C, input logic SqrtE, output logic un, - output logic [`DIVb+1:0] CNext, - output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSNext, WCNext + output logic [P.DIVb+1:0] CNext, + output logic [P.DIVb:0] UNext, UMNext, + output logic [P.DIVb+3:0] WSNext, WCNext ); /* verilator lint_on UNOPTFLAT */ - logic [`DIVb+3:0] Dsel; + logic [P.DIVb+3:0] Dsel; logic up, uz; - logic [`DIVb+3:0] F; - logic [`DIVb+3:0] AddIn; - logic [`DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] F; + logic [P.DIVb+3:0] AddIn; + logic [P.DIVb+3:0] WSA, WCA; // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) @@ -56,10 +55,10 @@ module fdivsqrtstage2 ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); + fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un); // Sqrt F generation. Extend C, U, UM to Q4.k - fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); // Divisor multiple always_comb @@ -69,16 +68,16 @@ module fdivsqrtstage2 ( // Partial Product Generation // WSA, WCA = WS + WC - qD - mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn); - csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA); + mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn); + csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA); assign WSNext = WSA << 1; assign WCNext = WCA << 1; // Shift thermometer code C - assign CNext = {1'b1, C[`DIVb+1:1]}; + assign CNext = {1'b1, C[P.DIVb+1:1]}; // Unified On-The-Fly Converter to accumulate result - fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext); + fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 9464e6a88..c6477ec68 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -26,29 +26,27 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtstage4 ( - input logic [`DIVb+3:0] D, DBar, D2, DBar2, - input logic [`DIVb:0] U,UM, - input logic [`DIVb+3:0] WS, WC, - input logic [`DIVb+1:0] C, +module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( + input logic [P.DIVb+3:0] D, DBar, D2, DBar2, + input logic [P.DIVb:0] U,UM, + input logic [P.DIVb+3:0] WS, WC, + input logic [P.DIVb+1:0] C, input logic SqrtE, j1, - output logic [`DIVb+1:0] CNext, + output logic [P.DIVb+1:0] CNext, output logic un, - output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSNext, WCNext + output logic [P.DIVb:0] UNext, UMNext, + output logic [P.DIVb+3:0] WSNext, WCNext ); - logic [`DIVb+3:0] Dsel; + logic [P.DIVb+3:0] Dsel; logic [3:0] udigit; - logic [`DIVb+3:0] F; - logic [`DIVb+3:0] AddIn; + logic [P.DIVb+3:0] F; + logic [P.DIVb+3:0] AddIn; logic [4:0] Smsbs; logic [2:0] Dmsbs; logic [7:0] WCmsbs, WSmsbs; logic CarryIn; - logic [`DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] WSA, WCA; // Digit Selection logic // u encoding: @@ -57,16 +55,16 @@ module fdivsqrtstage4 ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - assign Smsbs = U[`DIVb:`DIVb-4]; - assign Dmsbs = D[`DIVb-1:`DIVb-3]; - assign WCmsbs = WC[`DIVb+3:`DIVb-4]; - assign WSmsbs = WS[`DIVb+3:`DIVb-4]; + assign Smsbs = U[P.DIVb:P.DIVb-4]; + assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; + assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; + assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); assign un = 1'b0; // unused for radix 4 // F generation logic - fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); // Divisor multiple logic always_comb @@ -83,15 +81,15 @@ module fdivsqrtstage4 ( // {WS, WC}}Next = (WS + WC - qD or F) << 2 assign AddIn = SqrtE ? F : Dsel; assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D - csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); + csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); assign WSNext = WSA << 2; assign WCNext = WCA << 2; // Shift thermometer code C - assign CNext = {2'b11, C[`DIVb+1:2]}; + assign CNext = {2'b11, C[P.DIVb+1:2]}; // On-the-fly converter to accumulate result - fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv index 33956a0fd..bde28cfba 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv @@ -26,22 +26,20 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - /////////////////////////////// // Unified OTFC, Radix 2 // /////////////////////////////// -module fdivsqrtuotfc2( +module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) ( input logic up, un, - input logic [`DIVb+1:0] C, - input logic [`DIVb:0] U, UM, - output logic [`DIVb:0] UNext, UMNext + input logic [P.DIVb+1:0] C, + input logic [P.DIVb:0] U, UM, + output logic [P.DIVb:0] UNext, UMNext ); // The on-the-fly converter transfers the divsqrt // bits to the quotient as they come. - logic [`DIVb:0] K; + logic [P.DIVb:0] K; - assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding + assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding always_comb begin if (up) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv index 57298b4c8..403ccf051 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv @@ -26,19 +26,17 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fdivsqrtuotfc4( +module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) ( input logic [3:0] udigit, - input logic [`DIVb:0] U, UM, - input logic [`DIVb:0] C, - output logic [`DIVb:0] UNext, UMNext + input logic [P.DIVb:0] U, UM, + input logic [P.DIVb:0] C, + output logic [P.DIVb:0] UNext, UMNext ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [`DIVb:0] K1, K2, K3; + logic [P.DIVb:0] K1, K2, K3; assign K1 = (C&~(C << 1)); // K assign K2 = ((C << 1)&~(C << 2)); // 2K assign K3 = (C & ~(C << 2)); // 3K diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 338aa78a6..22d2da35b 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -238,7 +238,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); // divide and square root: fdiv, fsqrt, optionally integer division - fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), + fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, From e6d25b7f70828507023bca24d8c36ead0cdfda4d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 14:40:06 -0500 Subject: [PATCH 18/20] Finished fpu parameterization using Lim's method. --- src/fpu/fclassify.sv | 7 +- src/fpu/fcmp.sv | 72 ++++---- src/fpu/fcvt.sv | 106 ++++++------ src/fpu/fpu.sv | 10 +- src/fpu/fsgninj.sv | 40 ++--- src/fpu/postproc/cvtshiftcalc.sv | 56 +++--- src/fpu/postproc/divshiftcalc.sv | 30 ++-- src/fpu/postproc/flags.sv | 53 +++--- src/fpu/postproc/fmashiftcalc.sv | 104 ++++++----- src/fpu/postproc/negateintres.sv | 17 +- src/fpu/postproc/normshift.sv | 12 +- src/fpu/postproc/postprocess.sv | 100 ++++++----- src/fpu/postproc/resultsign.sv | 4 +- src/fpu/postproc/round.sv | 220 +++++++++++------------ src/fpu/postproc/roundsign.sv | 3 +- src/fpu/postproc/shiftcorrection.sv | 44 +++-- src/fpu/postproc/specialcase.sv | 260 ++++++++++++++-------------- 17 files changed, 556 insertions(+), 582 deletions(-) diff --git a/src/fpu/fclassify.sv b/src/fpu/fclassify.sv index af159878f..62b850d7f 100644 --- a/src/fpu/fclassify.sv +++ b/src/fpu/fclassify.sv @@ -25,16 +25,15 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module fclassify ( +module fclassify import cvw::*; #(parameter cvw_t P) ( input logic Xs, // sign bit input logic XNaN, // is NaN input logic XSNaN, // is signaling NaN input logic XSubnorm, // is Subnormal input logic XZero, // is zero input logic XInf, // is infinity - output logic [`XLEN-1:0] ClassRes // classify result + output logic [P.XLEN-1:0] ClassRes // classify result ); logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal @@ -63,6 +62,6 @@ module fclassify ( // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf}; + assign ClassRes = {{P.XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf}; endmodule diff --git a/src/fpu/fcmp.sv b/src/fpu/fcmp.sv index 63c234328..d470220e5 100755 --- a/src/fpu/fcmp.sv +++ b/src/fpu/fcmp.sv @@ -27,8 +27,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - // OpCtrl values // 110 min // 101 max @@ -36,23 +34,23 @@ // 001 less than // 011 less than or equal -module fcmp ( - input logic [`FMTBITS-1:0] Fmt, // format of fp number +module fcmp import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] Fmt, // format of fp number input logic [2:0] OpCtrl, // see above table input logic Xs, Ys, // input signs - input logic [`NE-1:0] Xe, Ye, // input exponents - input logic [`NF:0] Xm, Ym, // input mantissa + input logic [P.NE-1:0] Xe, Ye, // input exponents + input logic [P.NF:0] Xm, Ym, // input mantissa input logic XZero, YZero, // is zero input logic XNaN, YNaN, // is NaN input logic XSNaN, YSNaN, // is signaling NaN - input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) + input logic [P.FLEN-1:0] X, Y, // original inputs (before unpacker) output logic CmpNV, // invalid flag - output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result - output logic [`XLEN-1:0] CmpIntRes // compare integer result + output logic [P.FLEN-1:0] CmpFpRes, // compare floating-point result + output logic [P.XLEN-1:0] CmpIntRes // compare integer result ); logic LTabs, LT, EQ; // is X < or > or = Y - logic [`FLEN-1:0] NaNRes; // NaN result + logic [P.FLEN-1:0] NaNRes; // NaN result logic BothZero; // are both inputs zero logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN @@ -85,44 +83,44 @@ module fcmp ( // for RISC-V, return the canonical NaN // select the NaN result - if (`FPSIZES == 1) - if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + if (P.FPSIZES == 1) + if(P.IEEE754) assign NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + else assign NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; - else if (`FPSIZES == 2) - if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + else if (P.FPSIZES == 2) + if(P.IEEE754) assign NaNRes = Fmt ? {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + else assign NaNRes = Fmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; - else if (`FPSIZES == 3) + else if (P.FPSIZES == 3) always_comb case (Fmt) - `FMT: - if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - `FMT1: - if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - `FMT2: - if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - default: NaNRes = {`FLEN{1'bx}}; + P.FMT: + if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + P.FMT1: + if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + else NaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + P.FMT2: + if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN2{1'b1}}, Xs, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]}; + else NaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + default: NaNRes = {P.FLEN{1'bx}}; endcase - else if (`FPSIZES == 4) + else if (P.FPSIZES == 4) always_comb case (Fmt) 2'h3: - if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; 2'h1: - if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + if(P.IEEE754) NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, Xs, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]}; + else NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; 2'h0: - if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + if(P.IEEE754) NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, Xs, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]}; + else NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; 2'h2: - if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + if(P.IEEE754) NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, Xs, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]}; + else NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; endcase @@ -155,6 +153,6 @@ module fcmp ( // - -0 = 0 // - inf = inf and -inf = -inf // - return 0 if comparison with NaN (unordered) - assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; + assign CmpIntRes = {(P.XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; endmodule diff --git a/src/fpu/fcvt.sv b/src/fpu/fcvt.sv index 32ca7542f..640e4e82d 100644 --- a/src/fpu/fcvt.sv +++ b/src/fpu/fcvt.sv @@ -27,23 +27,21 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fcvt ( +module fcvt import cvw::*; #(parameter cvw_t P) ( input logic Xs, // input's sign - input logic [`NE-1:0] Xe, // input's exponent - input logic [`NF:0] Xm, // input's fraction - input logic [`XLEN-1:0] Int, // integer input - from IEU + input logic [P.NE-1:0] Xe, // input's exponent + input logic [P.NF:0] Xm, // input's fraction + input logic [P.XLEN-1:0] Int, // integer input - from IEU input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic ToInt, // is fp->int (since it's writting to the integer register) input logic XZero, // is the input zero - input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`NE:0] Ce, // the calculated expoent - output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by + input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + output logic [P.NE:0] Ce, // the calculated expoent + output logic [P.LOGCVTLEN-1:0] ShiftAmt, // how much to shift by output logic ResSubnormUf,// does the result underflow or is subnormal output logic Cs, // the result's sign output logic IntZero, // is the integer zero? - output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) + output logic [P.CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -56,16 +54,16 @@ module fcvt ( // bit 2 bit 1 bit 0 // for example: signed long -> single floating point has the OpCode 101 - logic [`FMTBITS-1:0] OutFmt; // format of the output - logic [`XLEN-1:0] PosInt; // the positive integer input - logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size - logic [`NE-2:0] NewBias; // the bias of the final result - logic [`NE-1:0] OldExp; // the old exponent + logic [P.FMTBITS-1:0] OutFmt; // format of the output + logic [P.XLEN-1:0] PosInt; // the positive integer input + logic [P.XLEN-1:0] TrimInt; // integer trimmed to the correct size + logic [P.NE-2:0] NewBias; // the bias of the final result + logic [P.NE-1:0] OldExp; // the old exponent logic Signed; // is the opperation with a signed integer? logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? - logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) - logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC + logic [P.CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) + logic [P.LOGCVTLEN-1:0] LeadingZeros; // output from the LZC // seperate OpCtrl for code readability @@ -76,9 +74,9 @@ module fcvt ( // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output // - int -> fp: Fmt contains the percision of the output - if (`FPSIZES == 2) - assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); - else if (`FPSIZES == 3 | `FPSIZES == 4) + if (P.FPSIZES == 2) + assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT); + else if (P.FPSIZES == 3 | P.FPSIZES == 4) assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; @@ -89,7 +87,7 @@ module fcvt ( // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) assign PosInt = Cs ? -Int : Int; - assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; + assign TrimInt = {{P.XLEN-32{Int64}}, {32{1'b1}}} & PosInt; assign IntZero = ~|TrimInt; /////////////////////////////////////////////////////////////////////////// @@ -99,13 +97,13 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : - {Xm, {`CVTLEN-`NF{1'b0}}}; + assign LzcInFull = IntToFp ? {TrimInt, {P.CVTLEN-P.XLEN+1{1'b0}}} : + {Xm, {P.CVTLEN-P.NF{1'b0}}}; // used as shifter input in postprocessor - assign LzcIn = LzcInFull[`CVTLEN-1:0]; + assign LzcIn = LzcInFull[P.CVTLEN-1:0]; - lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); + lzc #(P.CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -114,42 +112,42 @@ module fcvt ( // Select the bias of the output // fp -> int : select 1 // ??? -> fp : pick the new bias depending on the output format - if (`FPSIZES == 1) begin - assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); + if (P.FPSIZES == 1) begin + assign NewBias = ToInt ? (P.NE-1)'(1) : (P.NE-1)'(P.BIAS); - end else if (`FPSIZES == 2) begin - logic [`NE-2:0] NewBiasToFp; - assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + end else if (P.FPSIZES == 2) begin + logic [P.NE-2:0] NewBiasToFp; + assign NewBiasToFp = OutFmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); + assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; - end else if (`FPSIZES == 3) begin - logic [`NE-2:0] NewBiasToFp; + end else if (P.FPSIZES == 3) begin + logic [P.NE-2:0] NewBiasToFp; always_comb case (OutFmt) - `FMT: NewBiasToFp = (`NE-1)'(`BIAS); - `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1); - `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2); - default: NewBiasToFp = {`NE-1{1'bx}}; + P.FMT: NewBiasToFp = (P.NE-1)'(P.BIAS); + P.FMT1: NewBiasToFp = (P.NE-1)'(P.BIAS1); + P.FMT2: NewBiasToFp = (P.NE-1)'(P.BIAS2); + default: NewBiasToFp = {P.NE-1{1'bx}}; endcase - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; - end else if (`FPSIZES == 4) begin - logic [`NE-2:0] NewBiasToFp; + end else if (P.FPSIZES == 4) begin + logic [P.NE-2:0] NewBiasToFp; always_comb case (OutFmt) - 2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS); - 2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS); - 2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS); - 2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS); + 2'h3: NewBiasToFp = (P.NE-1)'(P.Q_BIAS); + 2'h1: NewBiasToFp = (P.NE-1)'(P.D_BIAS); + 2'h0: NewBiasToFp = (P.NE-1)'(P.S_BIAS); + 2'h2: NewBiasToFp = (P.NE-1)'(P.H_BIAS); endcase - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; end // select the old exponent // int -> fp : largest bias + XLEN-1 // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; + assign OldExp = IntToFp ? (P.NE)'(P.BIAS)+(P.NE)'(P.XLEN-1) : Xe; // calculate CalcExp // fp -> fp : @@ -159,13 +157,13 @@ module fcvt ( // - correct the expoent when there is a normalization shift ( + LeadingZeros+1) // - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction // fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1) - // | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp + // | P.XLEN zeros | Mantissa | 0's if nessisary | << CalcExp // process: // - start - // | `XLEN zeros | Mantissa | 0's if nessisary | + // | P.XLEN zeros | Mantissa | 0's if nessisary | // // - shift left 1 (1) - // | `XLEN-1 zeros |bit| frac | 0's if nessisary | + // | P.XLEN-1 zeros |bit| frac | 0's if nessisary | // . <- binary point // // - shift left till unbiased exponent is 0 (XExp - Largest Bias) @@ -185,13 +183,13 @@ module fcvt ( // - newBias to make the biased exponent // // oldexp - biasold - LeadingZeros + newbias - assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias}; + assign Ce = {1'b0, OldExp} - (P.NE+1)'(P.BIAS) - {{P.NE-P.LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion - assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; + assign ResSubnormUf = (~|Ce | Ce[P.NE])&~XZero&~IntToFp; /////////////////////////////////////////////////////////////////////////// @@ -211,8 +209,8 @@ module fcvt ( // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? always_comb - if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; - else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + if(ToInt) ShiftAmt = Ce[P.LOGCVTLEN-1:0]&{P.LOGCVTLEN{~Ce[P.NE]}}; + else if (ResSubnormUf) ShiftAmt = (P.LOGCVTLEN)'(P.NF-1)+Ce[P.LOGCVTLEN-1:0]; else ShiftAmt = LeadingZeros; @@ -227,7 +225,7 @@ module fcvt ( // - otherwise: the floating point input's sign always_comb if(IntToFp) - if(Int64) Cs = Int[`XLEN-1]&Signed; + if(Int64) Cs = Int[P.XLEN-1]&Signed; else Cs = Int[31]&Signed; else Cs = Xs; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 22d2da35b..f69cc0ab6 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -245,20 +245,20 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .QmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq - fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); // sign injection: fsgnj/fsgnjx/fsgnjn - fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE)); + fsgninj #(P) fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE)); // classify: fclass - fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), + fclassify #(P) fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); // convert: fcvt.*.* - fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), + fcvt #(P) fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); @@ -325,7 +325,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // Memory Stage: postprocessor and result muxes ////////////////////////////////////////////////////////////////////////////////////////// - postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), + postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), diff --git a/src/fpu/fsgninj.sv b/src/fpu/fsgninj.sv index f85206b41..d9cfc9d05 100755 --- a/src/fpu/fsgninj.sv +++ b/src/fpu/fsgninj.sv @@ -26,14 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fsgninj ( +module fsgninj import cvw::*; #(parameter cvw_t P) ( input logic Xs, Ys, // X and Y sign bits - input logic [`FLEN-1:0] X, // X - input logic [`FMTBITS-1:0] Fmt, // format + input logic [P.FLEN-1:0] X, // X + input logic [P.FMTBITS-1:0] Fmt, // format input logic [1:0] OpCtrl, // operation control - output logic [`FLEN-1:0] SgnRes // result + output logic [P.FLEN-1:0] SgnRes // result ); logic ResSgn; // result sign @@ -50,30 +48,30 @@ module fsgninj ( // - uses NaN-blocking format // - if there are any unused bits the most significant bits are filled with 1s - if (`FPSIZES == 1) - assign SgnRes = {ResSgn, X[`FLEN-2:0]}; - else if (`FPSIZES == 2) - assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; - else if (`FPSIZES == 3) begin + if (P.FPSIZES == 1) + assign SgnRes = {ResSgn, X[P.FLEN-2:0]}; + else if (P.FPSIZES == 2) + assign SgnRes = {~Fmt|ResSgn, X[P.FLEN-2:P.LEN1], Fmt ? X[P.LEN1-1] : ResSgn, X[P.LEN1-2:0]}; + else if (P.FPSIZES == 3) begin logic [2:0] SgnBits; always_comb case (Fmt) - `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; - `FMT2: SgnBits = {2'b11, ResSgn}; + P.FMT: SgnBits = {ResSgn, X[P.LEN1-1], X[P.LEN2-1]}; + P.FMT1: SgnBits = {1'b1, ResSgn, X[P.LEN2-1]}; + P.FMT2: SgnBits = {2'b11, ResSgn}; default: SgnBits = {3{1'bx}}; endcase - assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; - end else if (`FPSIZES == 4) begin + assign SgnRes = {SgnBits[2], X[P.FLEN-2:P.LEN1], SgnBits[1], X[P.LEN1-2:P.LEN2], SgnBits[0], X[P.LEN2-2:0]}; + end else if (P.FPSIZES == 4) begin logic [3:0] SgnBits; always_comb case (Fmt) - `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; - `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; - `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; - `H_FMT: SgnBits = {3'b111, ResSgn}; + P.Q_FMT: SgnBits = {ResSgn, X[P.D_LEN-1], X[P.S_LEN-1], X[P.H_LEN-1]}; + P.D_FMT: SgnBits = {1'b1, ResSgn, X[P.S_LEN-1], X[P.H_LEN-1]}; + P.S_FMT: SgnBits = {2'b11, ResSgn, X[P.H_LEN-1]}; + P.H_FMT: SgnBits = {3'b111, ResSgn}; endcase - assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; + assign SgnRes = {SgnBits[3], X[P.Q_LEN-2:P.D_LEN], SgnBits[2], X[P.D_LEN-2:P.S_LEN], SgnBits[1], X[P.S_LEN-2:P.H_LEN], SgnBits[0], X[P.H_LEN-2:0]}; end endmodule diff --git a/src/fpu/postproc/cvtshiftcalc.sv b/src/fpu/postproc/cvtshiftcalc.sv index 7297824f3..105778d0c 100644 --- a/src/fpu/postproc/cvtshiftcalc.sv +++ b/src/fpu/postproc/cvtshiftcalc.sv @@ -26,22 +26,20 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module cvtshiftcalc( +module cvtshiftcalc import cvw::*; #(parameter cvw_t P) ( input logic XZero, // is the input zero? input logic ToInt, // to integer conversion? input logic IntToFp, // interger to floating point conversion? - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`NE:0] CvtCe, // the calculated expoent - input logic [`NF:0] Xm, // input mantissas - input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic [P.NE:0] CvtCe, // the calculated expoent + input logic [P.NF:0] Xm, // input mantissas + input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) input logic CvtResSubnormUf, // is the conversion result subnormal or underlows output logic CvtResUf, // does the cvt result unerflow - output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted + output logic [P.CVTLEN+P.NF:0] CvtShiftIn // number to be shifted ); - logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) + logic [$clog2(P.NF):0] ResNegNF; // the result's fraction length negated (-NF) /////////////////////////////////////////////////////////////////////////// // shifter @@ -49,7 +47,7 @@ module cvtshiftcalc( // seclect the input to the shifter // fp -> int: - // | `XLEN zeros | mantissa | 0's if nessisary | + // | P.XLEN zeros | mantissa | 0's if nessisary | // . // Other problems: // - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding) @@ -57,7 +55,7 @@ module cvtshiftcalc( // - ex: for the case 0010000.... (double) // ??? -> fp: // - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left: - // | `NF-1 zeros | mantissa | 0's if nessisary | + // | P.NF-1 zeros | mantissa | 0's if nessisary | // . // - otherwise: // | LzcInM | 0's if nessisary | @@ -67,33 +65,33 @@ module cvtshiftcalc( // get rid of round bit if needed // | add sticky bit if needed // | | - if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; - else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; - else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; + if (ToInt) CvtShiftIn = {{P.XLEN{1'b0}}, Xm[P.NF]&~CvtCe[P.NE], Xm[P.NF-1]|(CvtCe[P.NE]&Xm[P.NF]), Xm[P.NF-2:0], {P.CVTLEN-P.XLEN{1'b0}}}; + else if (CvtResSubnormUf) CvtShiftIn = {{P.NF-1{1'b0}}, Xm, {P.CVTLEN-P.NF+1{1'b0}}}; + else CvtShiftIn = {CvtLzcIn, {P.NF+1{1'b0}}}; // choose the negative of the fraction size - if (`FPSIZES == 1) begin - assign ResNegNF = -($clog2(`NF)+1)'(`NF); + if (P.FPSIZES == 1) begin + assign ResNegNF = -($clog2(P.NF)+1)'(P.NF); - end else if (`FPSIZES == 2) begin - assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1); + end else if (P.FPSIZES == 2) begin + assign ResNegNF = OutFmt ? -($clog2(P.NF)+1)'(P.NF) : -($clog2(P.NF)+1)'(P.NF1); - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResNegNF = -($clog2(`NF)+1)'(`NF); - `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1); - `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2); + P.FMT: ResNegNF = -($clog2(P.NF)+1)'(P.NF); + P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1); + P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2); default: ResNegNF = 1'bx; endcase - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb case (OutFmt) - 2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF); - 2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF); - 2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF); - 2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF); + 2'h3: ResNegNF = -($clog2(P.NF)+1)'(P.Q_NF); + 2'h1: ResNegNF = -($clog2(P.NF)+1)'(P.D_NF); + 2'h0: ResNegNF = -($clog2(P.NF)+1)'(P.S_NF); + 2'h2: ResNegNF = -($clog2(P.NF)+1)'(P.H_NF); endcase end @@ -102,6 +100,6 @@ module cvtshiftcalc( // determine if the result underflows ??? -> fp // - if the first 1 is shifted out of the result then the result underflows // - can't underflow an integer to fp conversions - assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; + assign CvtResUf = ($signed(CvtCe) < $signed({{P.NE-$clog2(P.NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index 76668516c..8ac85b992 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -26,24 +26,22 @@ // and limitations under the License. ////////////////////////////////////////////////////////////////////////////////////////////////`include "wally-config.vh" -`include "wally-config.vh" - -module divshiftcalc( - input logic [`DIVb:0] DivQm, // divsqrt significand - input logic [`NE+1:0] DivQe, // divsqrt exponent - output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount - output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input +module divshiftcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.DIVb:0] DivQm, // divsqrt significand + input logic [P.NE+1:0] DivQe, // divsqrt exponent + output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount + output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input output logic DivResSubnorm, // is the divsqrt result subnormal output logic DivSubnormShiftPos // is the subnormal shift amount positive ); - logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount - logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive) - logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount + logic [P.LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount + logic [P.LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive) + logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount // is the result subnormal // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes - assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); + assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]); // if the result is subnormal // 00000000x.xxxxxx... Exp = DivQe @@ -51,8 +49,8 @@ module divshiftcalc( // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 // Left shift amount = DivQe+NF+1-1 - assign DivSubnormShift = (`NE+2)'(`NF)+DivQe; - assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1]; + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe; + assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; // if the result is normalized // 00000000x.xxxxxx... Exp = DivQe @@ -62,13 +60,13 @@ module divshiftcalc( // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit - assign NormShift = (`LOGNORMSHIFTSZ)'(`NF); + assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF); // if the shift amount is negitive then don't shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0; + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0; assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; // pre-shift the divider result for normalization - assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}}; + assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; endmodule diff --git a/src/fpu/postproc/flags.sv b/src/fpu/postproc/flags.sv index 701cf0524..d5745391f 100644 --- a/src/fpu/postproc/flags.sv +++ b/src/fpu/postproc/flags.sv @@ -25,18 +25,17 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module flags( +module flags import cvw::*; #(parameter cvw_t P) ( input logic Xs, // X sign - input logic [`FMTBITS-1:0] OutFmt, // output format + input logic [P.FMTBITS-1:0] OutFmt, // output format input logic InfIn, // is a Inf input being used input logic XInf, YInf, ZInf, // inputs are infinity input logic NaNIn, // is a NaN input being used input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs input logic XZero, YZero, // inputs are zero - input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - input logic [`NE+1:0] Me, // exponent of the normalized sum + input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [P.NE+1:0] Me, // exponent of the normalized sum // rounding input logic Plus1, // do you add one for rounding input logic Round, Guard, Sticky, // bits used to determine rounding @@ -47,7 +46,7 @@ module flags( input logic IntToFp, // convert integer to floating point input logic Int64, // convert to 64 bit integer input logic Signed, // convert to a signed integer - input logic [`NE:0] CvtCe, // the calculated expoent - Cvt + input logic [P.NE:0] CvtCe, // the calculated expoent - Cvt input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits // divsqrt input logic DivOp, // conversion opperation? @@ -92,33 +91,33 @@ module flags( // - any of the bits after the most significan 1 is one // - the most signifcant in 65 or 33 is still a one in the number and // one of the later bits is one - if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + if (P.FPSIZES == 1) begin + assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + end else if (P.FPSIZES == 2) begin + assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - end else if (`FPSIZES == 3) begin + assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; - `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); + P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]); default: ResExpGteMax = 1'bx; endcase - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb case (OutFmt) - `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; - `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); + P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE]; + P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]); + P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]); + P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]); endcase - assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[P.Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end @@ -127,7 +126,7 @@ module flags( // | and the exponent isn't negitive // | | if the input isnt infinity or NaN // | | | - assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); + assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero); /////////////////////////////////////////////////////////////////////////////// // Underflow @@ -141,7 +140,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); /////////////////////////////////////////////////////////////////////////////// @@ -156,7 +155,7 @@ module flags( // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) // | | - assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid; + assign IntInexact = ((CvtCe[P.NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid; // select the inexact flag to output assign Inexact = ToInt ? IntInexact : FpInexact; @@ -178,7 +177,7 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[P.NE+1])|((Xs&~Signed)&(~((CvtCe[P.NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index dabf0dfde..41fb84309 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -26,21 +26,19 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module fmashiftcalc( - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [`NE+1:0] FmaSe, // sum's exponent - input logic [3*`NF+3:0] FmaSm, // the positive sum - input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count - output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results +module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [P.NE+1:0] FmaSe, // sum's exponent + input logic [3*P.NF+3:0] FmaSm, // the positive sum + input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // normalization shift count + output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results output logic FmaSZero, // is the result subnormal - calculated before LZA corection output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*`NF+5:0] FmaShiftIn // is the sum zero + output logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt, // normalization shift count + output logic [3*P.NF+5:0] FmaShiftIn // is the sum zero ); - logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias - logic [`NE+1:0] BiasCorr; // correction for bias + logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias + logic [P.NE+1:0] BiasCorr; // correction for bias /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -50,75 +48,75 @@ module fmashiftcalc( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3); + assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3); //convert the sum's exponent into the proper percision - if (`FPSIZES == 1) begin + if (P.FPSIZES == 1) begin assign NormSumExp = PreNormSumExp; - end else if (`FPSIZES == 2) begin - assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); + end else if (P.FPSIZES == 2) begin + assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS); assign NormSumExp = PreNormSumExp+BiasCorr; - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: BiasCorr = '0; - `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS); - `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS); + P.FMT: BiasCorr = '0; + P.FMT1: BiasCorr = (P.NE+2)'(P.BIAS1-P.BIAS); + P.FMT2: BiasCorr = (P.NE+2)'(P.BIAS2-P.BIAS); default: BiasCorr = 'x; endcase end assign NormSumExp = PreNormSumExp+BiasCorr; - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb begin case (Fmt) 2'h3: BiasCorr = '0; - 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS); - 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS); - 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); + 2'h1: BiasCorr = (P.NE+2)'(P.D_BIAS-P.Q_BIAS); + 2'h0: BiasCorr = (P.NE+2)'(P.S_BIAS-P.Q_BIAS); + 2'h2: BiasCorr = (P.NE+2)'(P.H_BIAS-P.Q_BIAS); endcase end assign NormSumExp = PreNormSumExp+BiasCorr; end // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero - if (`FPSIZES == 1) begin + if (P.FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2)); assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - end else if (`FPSIZES == 2) begin + end else if (P.FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; + assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp; assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; - assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); - assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; + assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS2)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-2+P.BIAS-P.BIAS2)) | ~|PreNormSumExp; always_comb begin case (Fmt) - `FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - `FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - `FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; default: FmaPreResultSubnorm = 1'bx; endcase end - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp; - assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); - assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp; - assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); - assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; + assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.D_BIAS)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.D_NF-2+P.BIAS-P.D_BIAS)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.S_BIAS)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.S_NF-2+P.BIAS-P.S_BIAS)) | ~|PreNormSumExp; + assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.H_BIAS)); + assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-2+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp; always_comb begin case (Fmt) 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -132,6 +130,6 @@ module fmashiftcalc( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal assign FmaShiftIn = {2'b0, FmaSm}; - if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; - else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; + if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1; + else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/negateintres.sv b/src/fpu/postproc/negateintres.sv index 939bcc335..069a1a2b9 100644 --- a/src/fpu/postproc/negateintres.sv +++ b/src/fpu/postproc/negateintres.sv @@ -25,26 +25,25 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -module negateintres( +module negateintres import cvw::*; #(parameter cvw_t P) ( input logic Signed, // is the integer input signed input logic Int64, // is the integer input 64-bits input logic Plus1, // should one be added for rounding? input logic Xs, // X sign - input logic [`NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter + input logic [P.NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter output logic [1:0] CvtNegResMsbs, // most signigficant bits of possibly negated result - output logic [`XLEN+1:0] CvtNegRes // possibly negated integer result + output logic [P.XLEN+1:0] CvtNegRes // possibly negated integer result ); - logic [`XLEN+1:0] CvtPreRes; // integer result with rounding + logic [P.XLEN+1:0] CvtPreRes; // integer result with rounding logic [2:0] CvtNegResMsbs3; // first three msbs of possibly negated result // round and negate the positive res if needed - assign CvtPreRes = {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; - mux2 #(`XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes); + assign CvtPreRes = {2'b0, Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.XLEN]}+{{P.XLEN+1{1'b0}}, Plus1}; + mux2 #(P.XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes); // select 2 most significant bits - mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[`XLEN+1:`XLEN-1], Int64, CvtNegResMsbs3); + mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[P.XLEN+1:P.XLEN-1], Int64, CvtNegResMsbs3); mux2 #(2) msb2mux(CvtNegResMsbs3[2:1], CvtNegResMsbs3[1:0], Signed, CvtNegResMsbs); -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/postproc/normshift.sv b/src/fpu/postproc/normshift.sv index 44469e316..f80230219 100644 --- a/src/fpu/postproc/normshift.sv +++ b/src/fpu/postproc/normshift.sv @@ -25,8 +25,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - // convert shift // fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp @@ -72,11 +70,11 @@ // | Nf 0's | Qm | << calculated shift amount // . -module normshift( - input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount - input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted - output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result +module normshift import cvw::*; #(parameter cvw_t P) ( + input logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount + input logic [P.NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted + output logic [P.NORMSHIFTSZ-1:0] Shifted // shifted result ); assign Shifted = ShiftIn << ShiftAmt; -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 07723d7f1..be06fbdd3 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -26,14 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module postprocess ( +module postprocess import cvw::*; #(parameter cvw_t P) ( // general signals input logic Xs, Ys, // input signs - input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic [P.NF:0] Xm, Ym, Zm, // input mantissas input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic XZero, YZero, // inputs are zero input logic XInf, YInf, ZInf, // inputs are infinity @@ -44,63 +42,63 @@ module postprocess ( input logic FmaAs, // the modified Z sign - depends on instruction input logic FmaPs, // the product's sign input logic FmaSs, // Sum sign - input logic [`NE+1:0] FmaSe, // the sum's exponent - input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic [P.NE+1:0] FmaSe, // the sum's exponent + input logic [3*P.NF+3:0] FmaSm, // the positive sum input logic FmaASticky, // sticky bit that is calculated during alignment - input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count + input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit - input logic [`NE+1:0] DivQe, // divsqrt exponent - input logic [`DIVb:0] DivQm, // divsqrt significand + input logic [P.NE+1:0] DivQe, // divsqrt exponent + input logic [P.DIVb:0] DivQm, // divsqrt significand // conversion signals input logic CvtCs, // the result's sign - input logic [`NE:0] CvtCe, // the calculated expoent + input logic [P.NE:0] CvtCe, // the calculated expoent input logic CvtResSubnormUf, // the convert result is subnormal or underflows - input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by + input logic [P.LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by input logic ToInt, // is fp->int (since it's writting to the integer register) - input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) + input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) input logic IntZero, // is the integer input zero // final results - output logic [`FLEN-1:0] PostProcRes,// postprocessor final result + output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result output logic [4:0] PostProcFlg,// postprocesser flags - output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result + output logic [P.XLEN-1:0] FCvtIntRes // the integer conversion result ); // general signals logic Rs; // result sign - logic [`NF-1:0] Rf; // Result fraction - logic [`NE-1:0] Re; // Result exponent + logic [P.NF-1:0] Rf; // Result fraction + logic [P.NE-1:0] Re; // Result exponent logic Ms; // norMalized sign - logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction - logic [`NE+1:0] Me; // normalized exponent - logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic [P.NE+1:0] Me; // normalized exponent + logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow logic UfPlus1; // do you add one (for determining underflow flag) - logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount - logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift - logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount + logic [P.NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift + logic [P.NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) logic Plus1; // add one to the final result? logic Overflow; // overflow flag used to select results logic Invalid; // invalid flag used to select results logic Guard, Round, Sticky; // bits needed to determine rounding - logic [`FMTBITS-1:0] OutFmt; // output format + logic [P.FMTBITS-1:0] OutFmt; // output format // fma signals - logic [`NE+1:0] FmaMe; // exponent of the normalized sum + logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*`NF+5:0] FmaShiftIn; // fma shift input - logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results + logic [3*P.NF+5:0] FmaShiftIn; // fma shift input + logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection - logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma + logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma // division singals - logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount - logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input - logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount + logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input + logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift logic DivByZero; // divide by zero flag logic DivResSubnorm; // is the divsqrt result subnormal logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) // conversion signals - logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter + logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result - logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result + logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result logic CvtResUf; // did the convert result underflow logic IntInvalid; // invalid integer flag // readability signals @@ -132,9 +130,9 @@ module postprocess ( // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output // - otherwise: Fmt contains the percision of the output - if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); - else if (`FPSIZES == 3 | `FPSIZES == 4) + if (P.FPSIZES == 2) + assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); + else if (P.FPSIZES == 3 | P.FPSIZES == 4) assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// @@ -142,40 +140,40 @@ module postprocess ( /////////////////////////////////////////////////////////////////////////////// // final claulations before shifting - cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, + cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, + fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); // select which unit's output to shift always_comb case(PostProcSel) 2'b10: begin // fma - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}}; + ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}}; end 2'b00: begin // cvt - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; - ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; + ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; + ShiftIn = {CvtShiftIn, {P.NORMSHIFTSZ-P.CVTLEN-P.NF-1{1'b0}}}; end 2'b01: begin //divsqrt ShiftAmt = DivShiftAmt; ShiftIn = DivShiftIn; end default: begin - ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; - ShiftIn = {`NORMSHIFTSZ{1'bx}}; + ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}}; + ShiftIn = {P.NORMSHIFTSZ{1'bx}}; end endcase // main normalization shift - normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); + normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted); // correct for LZA/divsqrt error - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, + shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// @@ -191,7 +189,7 @@ module postprocess ( // calulate result sign used in rounding unit roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); @@ -206,7 +204,7 @@ module postprocess ( // Flags /////////////////////////////////////////////////////////////////////////////// - flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, + flags #(P) flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero, .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, @@ -216,9 +214,9 @@ module postprocess ( // Select the result /////////////////////////////////////////////////////////////////////////////// - negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); diff --git a/src/fpu/postproc/resultsign.sv b/src/fpu/postproc/resultsign.sv index 7eeba9e8a..9701cb7a6 100644 --- a/src/fpu/postproc/resultsign.sv +++ b/src/fpu/postproc/resultsign.sv @@ -26,8 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - module resultsign( input logic [2:0] Frm, // rounding mode input logic FmaOp, // is the operation an Fma @@ -77,4 +75,4 @@ module resultsign( else if(FmaSZero&FmaOp) Rs = Zeros; else Rs = Ms; -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index c1914bb53..6de52f778 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -26,42 +26,33 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" -// what position is XLEN in? -// options: -// 1: XLEN > NF > NF1 -// 2: NF > XLEN > NF1 -// 3: NF > NF1 > XLEN -// single and double will always be smaller than XLEN -`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3) - -module round( - input logic [`FMTBITS-1:0] OutFmt, // output format +module round import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] OutFmt, // output format input logic [2:0] Frm, // rounding mode input logic [1:0] PostProcSel, // select the postprocessor output input logic Ms, // normalized sign - input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction + input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction // fma input logic FmaOp, // is an fma opperation being done? - input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma + input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma input logic FmaASticky, // addend's sticky bit // divsqrt input logic DivOp, // is a division opperation being done input logic DivSticky, // divsqrt sticky bit - input logic [`NE+1:0] Qe, // the divsqrt calculated expoent + input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent // cvt input logic CvtOp, // is a convert opperation being done input logic ToInt, // is the cvt op a cvt to integer input logic CvtResSubnormUf, // is the cvt result subnormal or underflow input logic CvtResUf, // does the cvt result underflow - input logic [`NE:0] CvtCe, // the cvt calculated expoent + input logic [P.NE:0] CvtCe, // the cvt calculated expoent // outputs - output logic [`NE+1:0] Me, // normalied fraction + output logic [P.NE+1:0] Me, // normalied fraction output logic UfPlus1, // do you add one to the result if given an unbounded exponent - output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - output logic [`NE-1:0] Re, // Result exponent - output logic [`NF-1:0] Rf, // Result fractionNormS + output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [P.NE-1:0] Re, // Result exponent + output logic [P.NF-1:0] Rf, // Result fractionNormS output logic Sticky, // sticky bit output logic Plus1, // do you add one to the final result output logic Round, Guard // bits needed to calculate rounding @@ -69,7 +60,7 @@ module round( logic UfCalcPlus1; // calculated plus one for unbounded exponent logic NormSticky; // normalized sum's sticky bit - logic [`NF-1:0] RoundFrac; // rounded fraction + logic [P.NF-1:0] RoundFrac; // rounded fraction logic FpRes; // is the result a floating point logic IntRes; // is the result an integer logic FpGuard, FpRound; // floating point round/guard bits @@ -77,8 +68,17 @@ module round( logic LsbRes; // lsb of result logic CalcPlus1; // calculated plus1 logic FpPlus1; // do you add one to the fp result - logic [`FLEN:0] RoundAdd; // how much to add to the result + logic [P.FLEN:0] RoundAdd; // how much to add to the result +// what position is XLEN in? +// options: +// 1: XLEN > NF > NF1 +// 2: NF > XLEN > NF1 +// 3: NF > NF1 > XLEN +// single and double will always be smaller than XLEN +//`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3) + localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3; + /////////////////////////////////////////////////////////////////////////////// // Rounding /////////////////////////////////////////////////////////////////////////////// @@ -115,68 +115,68 @@ module round( assign FpRes = ~IntRes; // sticky bit calculation - if (`FPSIZES == 1) begin + if (P.FPSIZES == 1) begin // 1: XLEN > NF // | XLEN | // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); - end else if (`FPSIZES == 2) begin + end else if (P.FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); end @@ -184,40 +184,40 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp; + assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[P.NE+1]&FmaOp | DivSticky&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag - if (`FPSIZES == 1) begin - assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; - assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; - assign FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + if (P.FPSIZES == 1) begin + assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; + assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; + assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; - end else if (`FPSIZES == 2) begin - assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; - assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; - assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; + end else if (P.FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1]; + assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2]; - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; - FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + P.FMT: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; end - `FMT1: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF1-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF1]; - FpRound = Mf[`CORRSHIFTSZ-`NF1-2]; + P.FMT1: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2]; end - `FMT2: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF2-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF2]; - FpRound = Mf[`CORRSHIFTSZ-`NF2-2]; + P.FMT2: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2]; end default: begin FpGuard = 1'bx; @@ -225,35 +225,35 @@ module round( FpRound = 1'bx; end endcase - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb case (OutFmt) 2'h3: begin - FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF]; - FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; + FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2]; end 2'h1: begin - FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF]; - FpRound = Mf[`CORRSHIFTSZ-`D_NF-2]; + FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2]; end 2'h0: begin - FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF]; - FpRound = Mf[`CORRSHIFTSZ-`S_NF-2]; + FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2]; end 2'h2: begin - FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF]; - FpRound = Mf[`CORRSHIFTSZ-`H_NF-2]; + FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2]; end endcase end - assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard; - assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes; - assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound; + assign Guard = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard; + assign LsbRes = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes; + assign Round = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound; always_comb begin @@ -287,26 +287,26 @@ module round( // place Plus1 into the proper position for the format - if (`FPSIZES == 1) begin - assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + if (P.FPSIZES == 1) begin + assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1}; - end else if (`FPSIZES == 2) begin + end else if (P.FPSIZES == 2) begin // \/FLEN+1 // | NE+2 | NF | // '-NE+2-^----NF1----^ - // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; + // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt}; - end else if (`FPSIZES == 3) begin - assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; + end else if (P.FPSIZES == 3) begin + assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)}; - end else if (`FPSIZES == 4) - assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; + end else if (P.FPSIZES == 4) + assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; // trim unneeded bits from fraction - assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF]; @@ -314,7 +314,7 @@ module round( always_comb case(PostProcSel) 2'b10: Me = FmaMe; // fma - 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt + 2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt // 2'b01: Me = DivDone ? Qe : '0; // divide 2'b01: Me = Qe; // divide default: Me = '0; @@ -325,7 +325,7 @@ module round( // round the result // - if the fraction overflows one should be added to the exponent assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; - assign Re = FullRe[`NE-1:0]; + assign Re = FullRe[P.NE-1:0]; endmodule diff --git a/src/fpu/postproc/roundsign.sv b/src/fpu/postproc/roundsign.sv index 8809f0a4b..6c1135602 100644 --- a/src/fpu/postproc/roundsign.sv +++ b/src/fpu/postproc/roundsign.sv @@ -25,7 +25,6 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" module roundsign( input logic Xs, // x sign @@ -47,4 +46,4 @@ module roundsign( // Select sign for rounding calulation assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 2fb0b5d7e..1a4613d5f 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -26,53 +26,51 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module shiftcorrection( - input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction +module shiftcorrection import cvw::*; #(parameter cvw_t P) ( + input logic [P.NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction // divsqrt input logic DivOp, // is it a divsqrt opperation input logic DivResSubnorm, // is the divsqrt result subnormal - input logic [`NE+1:0] DivQe, // the divsqrt result's exponent + input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) //fma input logic FmaOp, // is it an fma opperation - input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection input logic FmaSZero, // output - output logic [`NE+1:0] FmaMe, // exponent of the normalized sum - output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction - output logic [`NE+1:0] Qe // corrected exponent for divider + output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum + output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [P.NE+1:0] Qe // corrected exponent for divider ); - logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted - logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift logic ResSubnorm; // is the result Subnormal logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction logic LeftShiftQm; // should the divsqrt result be shifted one to the left // LZA correction - assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1]; + assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1]; // correct the shifting error caused by the LZA // - the only possible mantissa for a plus two is all zeroes // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); + mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); // correct the shifting of the divsqrt caused by producing a result in (2, .5] range // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); - assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; - assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1]; - mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2]; + assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1]; + mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; + if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}}; else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; - else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ]; // Determine sum's exponent // main exponent issues: @@ -82,12 +80,12 @@ module shiftcorrection( // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 // if plus1 If plus2 kill if the result Zero or actually subnormal // | | | - assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}}; + assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}}; // recalculate if the result is subnormal after LZA correction - assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; + assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1]; // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; -endmodule \ No newline at end of file + assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1}; +endmodule diff --git a/src/fpu/postproc/specialcase.sv b/src/fpu/postproc/specialcase.sv index 6b2985c07..3825a0f78 100644 --- a/src/fpu/postproc/specialcase.sv +++ b/src/fpu/postproc/specialcase.sv @@ -26,14 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module specialcase( +module specialcase import cvw::*; #(parameter cvw_t P) ( input logic Xs, // X sign - input logic [`NF:0] Xm, Ym, Zm, // input significand's + input logic [P.NF:0] Xm, Ym, Zm, // input significand's input logic XNaN, YNaN, ZNaN, // are the inputs NaN input logic [2:0] Frm, // rounding mode - input logic [`FMTBITS-1:0] OutFmt, // output format + input logic [P.FMTBITS-1:0] OutFmt, // output format input logic InfIn, // are any inputs infinity input logic NaNIn, // are any input NaNs input logic XInf, YInf, // are X or Y inifnity @@ -41,9 +39,9 @@ module specialcase( input logic Plus1, // do you add one for rounding input logic Rs, // the result's sign input logic Invalid, Overflow, // flags to choose the result - input logic [`NE-1:0] Re, // Result exponent - input logic [`NE+1:0] FullRe, // Result full exponent - input logic [`NF-1:0] Rf, // Result fraction + input logic [P.NE-1:0] Re, // Result exponent + input logic [P.NE+1:0] FullRe, // Result full exponent + input logic [P.NF-1:0] Rf, // Result fraction // fma input logic FmaOp, // is it a fma opperation // divsqrt @@ -55,23 +53,23 @@ module specialcase( input logic IntToFp, // is cvt int -> fp opperation input logic Int64, // is the integer 64 bits input logic Signed, // is the integer signed - input logic [`NE:0] CvtCe, // the calculated expoent for cvt + input logic [P.NE:0] CvtCe, // the calculated expoent for cvt input logic IntInvalid, // integer invalid flag to choose the result input logic CvtResUf, // does the convert result underflow - input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result + input logic [P.XLEN+1:0] CvtNegRes, // the possibly negated of the integer result // outputs - output logic [`FLEN-1:0] PostProcRes,// final result - output logic [`XLEN-1:0] FCvtIntRes // final integer result + output logic [P.FLEN-1:0] PostProcRes,// final result + output logic [P.XLEN-1:0] FCvtIntRes // final integer result ); - logic [`FLEN-1:0] XNaNRes; // X is NaN result - logic [`FLEN-1:0] YNaNRes; // Y is NaN result - logic [`FLEN-1:0] ZNaNRes; // Z is NaN result - logic [`FLEN-1:0] InvalidRes; // Invalid result result - logic [`FLEN-1:0] UfRes; // underflowed result result - logic [`FLEN-1:0] OfRes; // overflowed result result - logic [`FLEN-1:0] NormRes; // normal result - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output + logic [P.FLEN-1:0] XNaNRes; // X is NaN result + logic [P.FLEN-1:0] YNaNRes; // Y is NaN result + logic [P.FLEN-1:0] ZNaNRes; // Z is NaN result + logic [P.FLEN-1:0] InvalidRes; // Invalid result result + logic [P.FLEN-1:0] UfRes; // underflowed result result + logic [P.FLEN-1:0] OfRes; // overflowed result result + logic [P.FLEN-1:0] NormRes; // normal result + logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output logic OfResMax; // does the of result output maximum norm fp number logic KillRes; // kill the result for underflow logic SelOfRes; // should the overflow result be selected @@ -82,158 +80,158 @@ module specialcase( assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); // select correct outputs for special cases - if (`FPSIZES == 1) begin + if (P.FPSIZES == 1) begin //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + if(P.IEEE754) begin + assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + assign ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]}; + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end - assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; assign NormRes = {Rs, Re, Rf}; - end else if (`FPSIZES == 2) begin - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else if (P.FPSIZES == 2) begin + if(P.IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; end always_comb if(OutFmt) - if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; - else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}}; + else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; else - if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; - else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}}; + else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; - end else if (`FPSIZES == 3) begin + end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + P.FMT: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end - OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; NormRes = {Rs, Re, Rf}; end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + P.FMT1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + ZNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]}; + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + P.FMT2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]}; + YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]}; + ZNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF2]}; + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)}; + UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]}; end default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); + if(P.IEEE754) begin + XNaNRes = (P.FLEN)'(0); + YNaNRes = (P.FLEN)'(0); + ZNaNRes = (P.FLEN)'(0); + InvalidRes = (P.FLEN)'(0); end else begin - InvalidRes = (`FLEN)'(0); + InvalidRes = (P.FLEN)'(0); end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); + OfRes = (P.FLEN)'(0); + UfRes = (P.FLEN)'(0); + NormRes = (P.FLEN)'(0); end endcase - end else if (`FPSIZES == 4) begin + end else if (P.FPSIZES == 4) begin always_comb case (OutFmt) 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; end - OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; NormRes = {Rs, Re, Rf}; end 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]}; + YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]}; + ZNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.D_NF]}; + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)}; + UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]}; end 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]}; + YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]}; + ZNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.S_NF]}; + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)}; + UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]}; end 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]}; + YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]}; + ZNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.H_NF]}; + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)}; + OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)}; // zero is exact if dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]}; end endcase end @@ -242,13 +240,13 @@ module specialcase( // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); // calculate if the overflow result should be selected assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); // output infinity with result sign if divide by zero - if(`IEEE754) + if(P.IEEE754) always_comb if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; else if(YNaN&~CvtOp) PostProcRes = YNaNRes; @@ -283,14 +281,14 @@ module specialcase( always_comb if(Signed) if(Xs&~NaNIn) // signed negitive - if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; - else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; + if(Int64) OfIntRes = {1'b1, {P.XLEN-1{1'b0}}}; + else OfIntRes = {{P.XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; else // signed positive - if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; - else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; + if(Int64) OfIntRes = {1'b0, {P.XLEN-1{1'b1}}}; + else OfIntRes = {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; else - if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive - else OfIntRes = {`XLEN{1'b1}}; // unsigned positive + if(Xs&~NaNIn) OfIntRes = {P.XLEN{1'b0}}; // unsigned negitive + else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive // select the integer output @@ -301,9 +299,9 @@ module specialcase( // - otherwise output the normal res (trmined and sign extended if nessisary) always_comb if(IntInvalid) FCvtIntRes = OfIntRes; - else if(CvtCe[`NE]) - if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; - else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; - else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; - else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; -endmodule \ No newline at end of file + else if(CvtCe[P.NE]) + if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}}; + else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1}; + else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0]; + else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; +endmodule From 340aac0934384c07e2c526929e4e5120e7a54324 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 26 May 2023 16:00:14 -0500 Subject: [PATCH 19/20] Got the branch predictor parameterized using Lim's method. Also had to add a global enum included in both cvw.sv and the configs which defines the branch predictor types. This should be synthesizable, but I'll need to double check. --- config/buildroot/config.vh | 3 +- config/rv32e/config.vh | 4 +- config/rv32gc/config.vh | 6 +- config/rv32i/config.vh | 4 +- config/rv32imc/config.vh | 4 +- config/rv64fpquad/config.vh | 4 +- config/rv64gc/config.vh | 5 +- config/rv64gc/wally-config.vh | 2 +- config/rv64i/config.vh | 4 +- config/shared/BranchPredictorType.vh | 3 + config/shared/parameter-defs.vh | 1 + src/fpu/postproc/round.sv | 1 - src/ifu/bpred/RASPredictor.sv | 12 ++-- src/ifu/bpred/bpred.sv | 95 ++++++++++++++-------------- src/ifu/bpred/btb.sv | 38 ++++++----- src/ifu/bpred/gshare.sv | 6 +- src/ifu/bpred/gsharebasic.sv | 7 +- src/ifu/bpred/icpred.sv | 17 ++--- src/ifu/bpred/localaheadbp.sv | 11 ++-- src/ifu/bpred/localbpbasic.sv | 7 +- src/ifu/bpred/localrepairbp.sv | 11 ++-- src/ifu/bpred/satCounter2.sv | 2 - src/ifu/bpred/twoBitPredictor.sv | 7 +- src/ifu/ifu.sv | 2 +- src/wally/cvw.sv | 5 +- 25 files changed, 132 insertions(+), 129 deletions(-) create mode 100644 config/shared/BranchPredictorType.vh diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 59848a39a..f5a1b6421 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -27,6 +27,7 @@ // include shared configuration `include "wally-shared.vh" +`include "BranchPredictorType.vh" localparam FPGA = 1; localparam QEMU = 0; @@ -131,7 +132,7 @@ localparam PLIC_UART_ID = 32'd10; localparam PLIC_GPIO_ID = 32'd3; localparam BPRED_SUPPORTED = 1; -localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT; +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index e41db6068..b2e45562e 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -25,6 +25,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -131,7 +133,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; -localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 855a374ae..23c865c8c 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -27,6 +27,7 @@ // include shared configuration // `include "wally-shared.vh" +`include "BranchPredictorType.vh" localparam FPGA = 0; localparam QEMU = 0; @@ -133,10 +134,9 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 1; -// BP_GSHARE, BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -// GSHARE_N, GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N -localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd16; +localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; localparam SVADU_SUPPORTED = 1; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 54f9791a5..bd307f17d 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -25,6 +25,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -131,7 +133,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; -localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index 5b0535ad9..d49fa7ec3 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -25,6 +25,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -130,7 +132,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; -localparam BPRED_TYPE = "GSHARE_N"; // GSHARE_B, GLOBAL_N, GLOBAL_B, TWOBIT_N +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 656e5c90c..655fab9b6 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -25,6 +25,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -133,7 +135,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 1; -localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 3e6ba806e..923cd08d3 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -28,6 +28,8 @@ // include shared configuration // `include "wally-shared.vh" +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -136,7 +138,8 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 1; -localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BPRED_NUM_LHR = 32'd6; localparam BPRED_SIZE = 32'd10; localparam BTB_SIZE = 32'd10; diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index c933d87ae..1dd3d7f4f 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -138,7 +138,7 @@ `define BPRED_SUPPORTED 1 //`define BPRED_TYPE "BP_GLOBAL_BASIC" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT `define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -`define BPRED_SIZE 6 +`define BPRED_SIZE 10 `define BPRED_NUM_LHR 4 `define BTB_SIZE 10 diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 15c0e7994..d2914c966 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -25,6 +25,8 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "BranchPredictorType.vh" + localparam FPGA = 0; localparam QEMU = 0; @@ -133,7 +135,7 @@ localparam PLIC_GPIO_ID = 32'd3; localparam PLIC_UART_ID = 32'd10; localparam BPRED_SUPPORTED = 0; -localparam BPRED_TYPE = "GSHARE_N"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT +localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_SIZE = 32'd10; localparam BPRED_NUM_LHR = 32'd6; localparam BTB_SIZE = 32'd10; diff --git a/config/shared/BranchPredictorType.vh b/config/shared/BranchPredictorType.vh new file mode 100644 index 000000000..0f62fb954 --- /dev/null +++ b/config/shared/BranchPredictorType.vh @@ -0,0 +1,3 @@ +typedef enum {BP_TWOBIT, BP_GSHARE, BP_GLOBAL, BP_GSHARE_BASIC, + BP_GLOBAL_BASIC, BP_LOCAL_BASIC, BP_LOCAL_AHEAD, BP_LOCAL_REPAIR} BranchPredictorType; + diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 58e061fbc..1dd28b746 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -74,6 +74,7 @@ parameter cvw_t P = '{ BPRED_SUPPORTED : BPRED_SUPPORTED, BPRED_TYPE : BPRED_TYPE, BPRED_SIZE : BPRED_SIZE, + BPRED_NUM_LHR : BPRED_NUM_LHR, BTB_SIZE : BTB_SIZE, RADIX : RADIX, DIVCOPIES : DIVCOPIES, diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 6de52f778..4f6ea819d 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -26,7 +26,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// - module round import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] OutFmt, // output format input logic [2:0] Frm, // rounding mode diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 21cad922f..85b300fd0 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -27,9 +27,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module RASPredictor #(parameter int StackSize = 16 )( +module RASPredictor import cvw::*; #(parameter cvw_t P, StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, @@ -37,15 +35,15 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic ReturnD, input logic ReturnE, CallE, // Instr class input logic BPReturnF, - input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call - output logic [`XLEN-1:0] RASPCF // Top of the stack + input logic [P.XLEN-1:0] PCLinkE, // PC of instruction after a call + output logic [P.XLEN-1:0] RASPCF // Top of the stack ); logic CounterEn; localparam Depth = $clog2(StackSize); logic [Depth-1:0] NextPtr, Ptr, P1, M1, IncDecPtr; - logic [StackSize-1:0] [`XLEN-1:0] memory; + logic [StackSize-1:0] [P.XLEN-1:0] memory; integer index; logic PopF; @@ -85,7 +83,7 @@ module RASPredictor #(parameter int StackSize = 16 )( always_ff @ (posedge clk) begin if(reset) begin for(index=0; index Date: Fri, 26 May 2023 16:24:12 -0500 Subject: [PATCH 20/20] Uncore is now parameterized. --- src/uncore/ahbapbbridge.sv | 18 +++++------ src/uncore/clint_apb.sv | 46 ++++++++++++++-------------- src/uncore/gpio_apb.sv | 14 ++++----- src/uncore/plic_apb.sv | 44 ++++++++++++--------------- src/uncore/ram_ahb.sv | 23 +++++++------- src/uncore/rom_ahb.sv | 12 +++----- src/uncore/uartPC16550D.sv | 13 ++++---- src/uncore/uart_apb.sv | 14 ++++----- src/uncore/uncore.sv | 62 ++++++++++++++++++-------------------- 9 files changed, 114 insertions(+), 132 deletions(-) diff --git a/src/uncore/ahbapbbridge.sv b/src/uncore/ahbapbbridge.sv index 011356ba2..bfbe9caaf 100644 --- a/src/uncore/ahbapbbridge.sv +++ b/src/uncore/ahbapbbridge.sv @@ -25,31 +25,29 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module ahbapbbridge #(PERIPHS = 2) ( +module ahbapbbridge import cvw::*; #(parameter cvw_t P, PERIPHS = 2) ( input logic HCLK, HRESETn, input logic [PERIPHS-1:0] HSEL, - input logic [`PA_BITS-1:0] HADDR, - input logic [`XLEN-1:0] HWDATA, - input logic [`XLEN/8-1:0] HWSTRB, + input logic [P.PA_BITS-1:0] HADDR, + input logic [P.XLEN-1:0] HWDATA, + input logic [P.XLEN/8-1:0] HWSTRB, input logic HWRITE, input logic [1:0] HTRANS, input logic HREADY, // input logic [3:0] HPROT, // not used - output logic [`XLEN-1:0] HRDATA, + output logic [P.XLEN-1:0] HRDATA, output logic HRESP, HREADYOUT, output logic PCLK, PRESETn, output logic [PERIPHS-1:0] PSEL, output logic PWRITE, output logic PENABLE, output logic [31:0] PADDR, - output logic [`XLEN-1:0] PWDATA, + output logic [P.XLEN-1:0] PWDATA, // output logic [2:0] PPROT, // not used - output logic [`XLEN/8-1:0] PSTRB, + output logic [P.XLEN/8-1:0] PSTRB, // output logic PWAKEUP // not used input logic [PERIPHS-1:0] PREADY, - input var [PERIPHS-1:0][`XLEN-1:0] PRDATA + input var [PERIPHS-1:0][P.XLEN-1:0] PRDATA ); logic initTrans, initTransSel, initTransSelD; diff --git a/src/uncore/clint_apb.sv b/src/uncore/clint_apb.sv index 7b6bf676b..fdbe8f640 100644 --- a/src/uncore/clint_apb.sv +++ b/src/uncore/clint_apb.sv @@ -27,17 +27,15 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -`include "wally-config.vh" - -module clint_apb ( +module clint_apb import cvw::*; #(parameter cvw_t P) ( input logic PCLK, PRESETn, input logic PSEL, input logic [15:0] PADDR, - input logic [`XLEN-1:0] PWDATA, - input logic [`XLEN/8-1:0] PSTRB, + input logic [P.XLEN-1:0] PWDATA, + input logic [P.XLEN/8-1:0] PSTRB, input logic PWRITE, input logic PENABLE, - output logic [`XLEN-1:0] PRDATA, + output logic [P.XLEN-1:0] PRDATA, output logic PREADY, output logic [63:0] MTIME, output logic MTimerInt, MSwInt @@ -53,7 +51,7 @@ module clint_apb ( assign PREADY = 1'b1; // CLINT never takes >1 cycle to respond // word aligned reads - if (`XLEN==64) assign #2 entry = {PADDR[15:3], 3'b000}; + if (P.XLEN==64) assign #2 entry = {PADDR[15:3], 3'b000}; else assign #2 entry = {PADDR[15:2], 2'b00}; // DH 2/20/21: Eventually allow MTIME to run off a separate clock @@ -63,7 +61,7 @@ module clint_apb ( // Use req and ack signals synchronized across the clock domains. // register access - if (`XLEN==64) begin:clint // 64-bit + if (P.XLEN==64) begin:clint // 64-bit always @(posedge PCLK) begin case(entry) 16'h0000: PRDATA <= {63'b0, MSIP}; @@ -79,7 +77,7 @@ module clint_apb ( end else if (memwrite) begin if (entry == 16'h0000) MSIP <= PWDATA[0]; if (entry == 16'h4000) begin - for(i=0;i<`XLEN/8;i++) + for(i=0;i APB bridge - ahbapbbridge #(4) ahbapbbridge ( + ahbapbbridge #(P, 4) ahbapbbridge ( .HCLK, .HRESETn, .HSEL({HSELUART, HSELPLIC, HSELCLINT, HSELGPIO}), .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HTRANS, .HREADY, .HRDATA(HREADBRIDGE), .HRESP(HRESPBRIDGE), .HREADYOUT(HREADYBRIDGE), .PCLK, .PRESETn, .PSEL, .PWRITE, .PENABLE, .PADDR, .PWDATA, .PSTRB, .PREADY, .PRDATA); assign HSELBRIDGE = HSELGPIO | HSELCLINT | HSELPLIC | HSELUART; // if any of the bridge signals are selected // on-chip RAM - if (`UNCORE_RAM_SUPPORTED) begin : ram - ram_ahb #(.BASE(`UNCORE_RAM_BASE), .RANGE(`UNCORE_RAM_RANGE)) ram ( + if (P.UNCORE_RAM_SUPPORTED) begin : ram + ram_ahb #(.P(P), .BASE(P.UNCORE_RAM_BASE), .RANGE(P.UNCORE_RAM_RANGE)) ram ( .HCLK, .HRESETn, .HSELRam, .HADDR, .HWRITE, .HREADY, .HTRANS, .HWDATA, .HWSTRB, .HREADRam, .HRESPRam, .HREADYRam); end - if (`BOOTROM_SUPPORTED) begin : bootrom - rom_ahb #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) + if (P.BOOTROM_SUPPORTED) begin : bootrom + rom_ahb #(.P(P), .BASE(P.BOOTROM_BASE), .RANGE(P.BOOTROM_RANGE)) bootrom(.HCLK, .HRESETn, .HSELRom(HSELBootRom), .HADDR, .HREADY, .HTRANS, .HREADRom(HREADBootRom), .HRESPRom(HRESPBootRom), .HREADYRom(HREADYBootRom)); end // memory-mapped I/O peripherals - if (`CLINT_SUPPORTED == 1) begin : clint - clint_apb clint(.PCLK, .PRESETn, .PSEL(PSEL[1]), .PADDR(PADDR[15:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + if (P.CLINT_SUPPORTED == 1) begin : clint + clint_apb #(P) clint(.PCLK, .PRESETn, .PSEL(PSEL[1]), .PADDR(PADDR[15:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[1]), .PREADY(PREADY[1]), .MTIME(MTIME_CLINT), .MTimerInt, .MSwInt); end else begin : clint assign MTIME_CLINT = 0; assign MTimerInt = 0; assign MSwInt = 0; end - if (`PLIC_SUPPORTED == 1) begin : plic - plic_apb plic(.PCLK, .PRESETn, .PSEL(PSEL[2]), .PADDR(PADDR[27:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + if (P.PLIC_SUPPORTED == 1) begin : plic + plic_apb #(P) plic(.PCLK, .PRESETn, .PSEL(PSEL[2]), .PADDR(PADDR[27:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[2]), .PREADY(PREADY[2]), .UARTIntr, .GPIOIntr, .MExtInt, .SExtInt); end else begin : plic assign MExtInt = 0; assign SExtInt = 0; end - if (`GPIO_SUPPORTED == 1) begin : gpio - gpio_apb gpio( + if (P.GPIO_SUPPORTED == 1) begin : gpio + gpio_apb #(P) gpio( .PCLK, .PRESETn, .PSEL(PSEL[0]), .PADDR(PADDR[7:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[0]), .PREADY(PREADY[0]), .iof0(), .iof1(), .GPIOIN, .GPIOOUT, .GPIOEN, .GPIOIntr); end else begin : gpio assign GPIOOUT = 0; assign GPIOEN = 0; assign GPIOIntr = 0; end - if (`UART_SUPPORTED == 1) begin : uart - uart_apb uart( + if (P.UART_SUPPORTED == 1) begin : uart + uart_apb #(P) uart( .PCLK, .PRESETn, .PSEL(PSEL[3]), .PADDR(PADDR[2:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[3]), .PREADY(PREADY[3]), .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface @@ -147,7 +145,7 @@ module uncore import cvw::*; #(parameter cvw_t P)( end else begin : uart assign UARTSout = 0; assign UARTIntr = 0; end - if (`SDC_SUPPORTED == 1) begin : sdc + if (P.SDC_SUPPORTED == 1) begin : sdc SDC SDC(.HCLK, .HRESETn, .HSELSDC, .HADDR(HADDR[4:0]), .HWRITE, .HREADY, .HTRANS, .HWDATA, .HREADSDC, .HRESPSDC, .HREADYSDC, // sdc interface @@ -162,11 +160,11 @@ module uncore import cvw::*; #(parameter cvw_t P)( end // AHB Read Multiplexer - assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) | - ({`XLEN{HSELEXTD}} & HRDATAEXT) | - ({`XLEN{HSELBRIDGED}} & HREADBRIDGE) | - ({`XLEN{HSELBootRomD}} & HREADBootRom) | - ({`XLEN{HSELSDCD}} & HREADSDC); + assign HRDATA = ({P.XLEN{HSELRamD}} & HREADRam) | + ({P.XLEN{HSELEXTD}} & HRDATAEXT) | + ({P.XLEN{HSELBRIDGED}} & HREADBRIDGE) | + ({P.XLEN{HSELBootRomD}} & HREADBootRom) | + ({P.XLEN{HSELSDCD}} & HREADSDC); assign HRESP = HSELRamD & HRESPRam | HSELEXTD & HRESPEXT |