Merge branch 'main' of https://github.com/openhwgroup/cvw into dev

2025-02-11 06:05:49 +00:00 · 2023-05-29 23:24:54 -07:00 · 2023-05-29 23:24:54 -07:00 · 9b8a2303a9
commit 9b8a2303a9
parent 4357cc579c aa95d1957b
159 changed files with 4869 additions and 3066 deletions
--- a/benchmarks/coremark/riscv64-baremetal/syscalls.c
+++ b/benchmarks/coremark/riscv64-baremetal/syscalls.c
@ -84,6 +84,11 @@ void setStats(int enable)
  READ_CTR(mhpmcounter10);
  READ_CTR(mhpmcounter11);
  READ_CTR(mhpmcounter12);  
+  READ_CTR(mhpmcounter13);  
+  READ_CTR(mhpmcounter14);  
+  READ_CTR(mhpmcounter15);  
+  READ_CTR(mhpmcounter16);  
+  READ_CTR(mhpmcounter17);  

 #undef READ_CTR
 }
@ -167,18 +172,21 @@ void _init(int cid, int nc)
  counters[12] = read_csr(mhpmcounter12) - counters[12];
  counters[13] = read_csr(mhpmcounter13) - counters[13];
  counters[14] = read_csr(mhpmcounter14) - counters[14];
+  counters[15] = read_csr(mhpmcounter15) - counters[15];
+  counters[16] = read_csr(mhpmcounter16) - counters[16];
+  counters[17] = read_csr(mhpmcounter17) - counters[17];

-  ee_printf("Load Stalls %d\n", counters[3]);
-  ee_printf("D-Cache Accesses %d\n", counters[11]);
-  ee_printf("D-Cache Misses %d\n", counters[12]); 
-  ee_printf("I-Cache Accesses %d\n", counters[13]);
-  ee_printf("I-Cache Misses %d\n", counters[14]);
-  ee_printf("Branches %d\n", counters[5]);
-  ee_printf("Branches Miss Predictions %d\n", counters[4]);
-  ee_printf("BTB Misses %d\n", counters[6]);
-  ee_printf("Jump, JAL, JALR %d\n", counters[7]);
-  ee_printf("RAS Wrong %d\n", counters[8]);
-  ee_printf("Returns %d\n", counters[9]);
+  ee_printf("Load Stalls %d\n", counters[11]);
+  ee_printf("D-Cache Accesses %d\n", counters[13]);
+  ee_printf("D-Cache Misses %d\n", counters[14]); 
+  ee_printf("I-Cache Accesses %d\n", counters[16]);
+  ee_printf("I-Cache Misses %d\n", counters[17]);
+  ee_printf("Branches %d\n", counters[3]);
+  ee_printf("Branches Miss Predictions %d\n", counters[7]);
+  ee_printf("BTB Misses %d\n", counters[8]);
+  ee_printf("Jump and JR %d\n", counters[4]);
+  ee_printf("RAS Wrong %d\n", counters[9]);
+  ee_printf("Returns %d\n", counters[5]);
  ee_printf("BP Class Wrong %d\n", counters[10]);
  ee_printf("Done printing performance counters\n");

--- a/bin/parseHPMC.py
+++ b/bin/parseHPMC.py
@ -279,12 +279,13 @@ if(sys.argv[1] == '-b'):
                    dct[PredType] = (currSize, currPercent)
        print(dct)
        fig, axes = plt.subplots()
-        marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
-        colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
+        marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x', 'tenlocalahead' : '.', 'eightlocalahead' : ',', 'fourlocalahead' : 'x', 'tenlocalrepair' : 'x'}
+        colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue', 'tenlocalahead' : 'lightblue', 'eightlocalahead' : 'lightblue', 'fourlocalahead' : 'lightblue', 'tenlocalrepair' : 'lightblue'}
        for cat in dct:
            (x, y) = dct[cat]
            x=[int(2**int(v)) for v in x]
-            print(x, y)
+            #print(x, y)
+            print(cat)
            axes.plot(x,y, color=colors[cat])
            axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
            #plt.scatter(x, y, label=cat)
--- a/config/buildroot/config.vh
+++ b/config/buildroot/config.vh
@ -0,0 +1,157 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// include shared configuration
+`include "wally-shared.vh"
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 1;
+localparam QEMU = 0;
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd64;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+localparam MISA = (32'h0014112D);
+localparam ZICSR_SUPPORTED = 1;
+localparam ZIFENCEI_SUPPORTED = 1;
+localparam ZICOUNTERS_SUPPORTED = 1;
+localparam COUNTERS = 12'd32;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 1;
+localparam ICACHE_SUPPORTED = 1;
+localparam VIRTMEM_SUPPORTED = 1;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
+localparam BIGENDIAN_SUPPORTED = 1;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd32;
+localparam DTLB_ENTRIES = 32'd32;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 1;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd16;
+
+// Address space
+localparam RESET_VECTOR = 64'h0000000000001000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE =       64'h80000000;
+localparam DTIM_RANGE =      64'h00001FFF;
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE =       64'h80000000;
+localparam IROM_RANGE =      64'h00001FFF;
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE =   64'h00001000 ;
+localparam BOOTROM_RANGE =  64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE =       64'h80000000;
+localparam UNCORE_RAM_RANGE =      64'h07FFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE =       64'h80000000;
+localparam EXT_MEM_RANGE =      64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b1;
+localparam CLINT_BASE =  64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b1;
+localparam GPIO_BASE =   64'h10060000;
+localparam GPIO_RANGE =  64'h000000FF;
+localparam UART_SUPPORTED = 1'b1;
+localparam UART_BASE =   64'h10000000;
+localparam UART_RANGE =  64'h00000007;
+localparam PLIC_SUPPORTED = 1'b1;
+localparam PLIC_BASE =   64'h0C000000;
+localparam PLIC_RANGE =  64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE =   64'h00012100;
+localparam SDC_RANGE =  64'h0000001F;
+
+// Bus Interface width
+localparam AHBW = 32'd64;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 0;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd0;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd53;
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_UART_ID = 32'd10;
+localparam PLIC_GPIO_ID = 32'd3;
+
+localparam BPRED_SUPPORTED = 1;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+
+localparam SVADU_SUPPORTED = 1;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/buildroot/wally-config.vh
+++ b/config/buildroot/wally-config.vh
@ -132,6 +132,7 @@
 `define BPRED_SUPPORTED 1
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10


--- a/config/fpga/wally-config.vh
+++ b/config/fpga/wally-config.vh
@ -141,6 +141,7 @@
 `define BPRED_SUPPORTED 1
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 12
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10


--- a/config/rv32e/config.vh
+++ b/config/rv32e/config.vh
@ -0,0 +1,158 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd32;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// E
+localparam MISA = (32'h00000010); 
+localparam ZICSR_SUPPORTED = 0;
+localparam ZIFENCEI_SUPPORTED = 0;
+localparam COUNTERS = 12'd0;
+localparam ZICOUNTERS_SUPPORTED = 0;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 0;
+localparam ICACHE_SUPPORTED = 0;
+localparam VIRTMEM_SUPPORTED = 0;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 0; 
+localparam BIGENDIAN_SUPPORTED = 0;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd0;
+localparam DTLB_ENTRIES = 32'd0;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd1;
+localparam IDIV_ON_FPU = 0;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd0;
+
+// Address space
+localparam RESET_VECTOR = 64'h80000000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE = 64'h80000000;      
+localparam DTIM_RANGE = 64'h007FFFFF;     
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE = 64'h80000000;     
+localparam IROM_RANGE = 64'h007FFFFF;     
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE = 64'h00001000;  
+localparam BOOTROM_RANGE = 64'h00000FFF; 
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE = 64'h80000000;      
+localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;     
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE = 64'h80000000;      
+localparam EXT_MEM_RANGE = 64'h07FFFFFF;     
+localparam CLINT_SUPPORTED = 1'b0;
+localparam CLINT_BASE = 64'h02000000; 
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b0;
+localparam GPIO_BASE = 64'h10060000;  
+localparam GPIO_RANGE = 64'h000000FF; 
+localparam UART_SUPPORTED = 1'b0;
+localparam UART_BASE = 64'h10000000;  
+localparam UART_RANGE = 64'h00000007; 
+localparam PLIC_SUPPORTED = 1'b0;
+localparam PLIC_BASE = 64'h0C000000;  
+localparam PLIC_RANGE = 64'h03FFFFFF; 
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE = 64'h00012100;  
+localparam SDC_RANGE = 64'h0000001F; 
+
+// Bus Interface width
+localparam AHBW = 32'd32;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10; 
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 0;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'd4;
+localparam DIVCOPIES = 32'd4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
+ 
--- a/config/rv32e/rv32e-config.vh
+++ b/config/rv32e/rv32e-config.vh
@ -0,0 +1,178 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+localparam PA_BITS = 34;
+//localparam AHBW = 32;
+//localparam XLEN = 32;
+//localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
+////localparam    BUS_SUPPORTED = 1'b1;
+//localparam    ZICSR_SUPPORTED = 1'b0;
+localparam    M_SUPPORTED = 1'b0;
+localparam    F_SUPPORTED = 1'b0;
+//localparam    ZMMUL_SUPPORTED = 1'b0;
+//localparam    F_SUPPORTED = 1'b0;
+//localparam    PMP_ENTRIES = 0;
+localparam    LLEN =     32;
+//localparam    FPGA =     1'b0;
+//localparam    QEMU =     1'b0;
+ //   //VPN_SEGMENT_BITS: (LLEN == 32 ? 10 : 9),
+   // `include "test-shared.vh"
+localparam    FLEN =     32;
+
+`include "test-shared.vh"
+ 
+
+ 
+// include shared configuration
+//`include "wally-shared.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// E
+localparam MISA = (32'h00000010); 
+localparam ZICSR_SUPPORTED = 0;
+localparam ZIFENCEI_SUPPORTED = 0;
+localparam COUNTERS = 0;
+localparam ZICOUNTERS_SUPPORTED = 0;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 0;
+localparam ICACHE_SUPPORTED = 0;
+localparam VIRTMEM_SUPPORTED = 0;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 0; 
+localparam BIGENDIAN_SUPPORTED = 0;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 0;
+localparam DTLB_ENTRIES = 0;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 4;
+localparam DCACHE_WAYSIZEINBYTES = 4096;
+localparam DCACHE_LINELENINBITS = 512;
+localparam ICACHE_NUMWAYS = 4;
+localparam ICACHE_WAYSIZEINBYTES = 4096;
+localparam ICACHE_LINELENINBITS = 512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 1;
+localparam IDIV_ON_FPU = 0;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 0;
+
+// Address space
+localparam RESET_VECTOR = 32'h80000000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE = 34'h80000000;      
+localparam DTIM_RANGE = 34'h007FFFFF;     
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE = 34'h80000000;     
+localparam IROM_RANGE = 34'h007FFFFF;     
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE = 34'h00001000;  
+localparam BOOTROM_RANGE = 34'h00000FFF; 
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE = 34'h80000000;      
+localparam UNCORE_RAM_RANGE = 34'h07FFFFFF;     
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE = 34'h80000000;      
+localparam EXT_MEM_RANGE = 34'h07FFFFFF;     
+localparam CLINT_SUPPORTED = 1'b0;
+localparam CLINT_BASE = 34'h02000000; 
+localparam CLINT_RANGE = 34'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b0;
+localparam GPIO_BASE = 34'h10060000;  
+localparam GPIO_RANGE = 34'h000000FF; 
+localparam UART_SUPPORTED = 1'b0;
+localparam UART_BASE = 34'h10000000;  
+localparam UART_RANGE = 34'h00000007; 
+localparam PLIC_SUPPORTED = 1'b0;
+localparam PLIC_BASE = 34'h0C000000;  
+localparam PLIC_RANGE = 34'h03FFFFFF; 
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE = 34'h00012100;  
+localparam SDC_RANGE = 34'h0000001F; 
+
+// Bus Interface width
+localparam AHBW = 32;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 10; 
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 3;
+localparam PLIC_UART_ID = 10;
+
+localparam BPRED_SUPPORTED = 0;
+localparam BPRED_TYPE = "BP_GSHARE"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 10;
+localparam BTB_SIZE = 10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 4;
+localparam DIVCOPIES = 4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+ 
--- a/config/rv32e/wally-config.vh
+++ b/config/rv32e/wally-config.vh
@ -136,6 +136,7 @@
 `define BPRED_SUPPORTED 0
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 0
--- a/config/rv32gc/config.vh
+++ b/config/rv32gc/config.vh
@ -0,0 +1,158 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// include shared configuration
+// `include "wally-shared.vh"
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd32;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12 | 1 << 0 | 1 <<3 | 1 << 5);
+localparam ZICSR_SUPPORTED = 1;
+localparam ZIFENCEI_SUPPORTED = 1;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 1;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 1;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 1;
+localparam ICACHE_SUPPORTED = 1;
+localparam VIRTMEM_SUPPORTED = 1;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
+localparam BIGENDIAN_SUPPORTED = 1;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd32;
+localparam DTLB_ENTRIES = 32'd32;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 1;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd16;
+
+// Address space
+localparam RESET_VECTOR = 64'h80000000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE       = 64'h80000000;
+localparam DTIM_RANGE      = 64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE       = 64'h80000000;
+localparam IROM_RANGE      = 64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE   = 64'h00001000;
+localparam BOOTROM_RANGE  = 64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE       = 64'h80000000;
+localparam UNCORE_RAM_RANGE      = 64'h07FFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE       = 64'h80000000;
+localparam EXT_MEM_RANGE      = 64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b1;
+localparam CLINT_BASE  = 64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b1;
+localparam GPIO_BASE   = 64'h10060000;
+localparam GPIO_RANGE  = 64'h000000FF;
+localparam UART_SUPPORTED = 1'b1;
+localparam UART_BASE   = 64'h10000000;
+localparam UART_RANGE  = 64'h00000007;
+localparam PLIC_SUPPORTED = 1'b1;
+localparam PLIC_BASE   = 64'h0C000000;
+localparam PLIC_RANGE  = 64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE   = 64'h00012100;
+localparam SDC_RANGE  = 64'h0000001F;
+
+// Bus Interface width
+localparam AHBW = 32'd32;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 1;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd16;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 1;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'd4;
+localparam DIVCOPIES = 32'd4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 1;
+localparam ZBB_SUPPORTED = 1;
+localparam ZBC_SUPPORTED = 1;
+localparam ZBS_SUPPORTED = 1;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv32gc/wally-config.vh
+++ b/config/rv32gc/wally-config.vh
@ -133,8 +133,9 @@
 `define PLIC_UART_ID 10

 `define BPRED_SUPPORTED 1
-`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 16
+`define BPRED_NUM_LHR 8
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 1
--- a/config/rv32i/config.vh
+++ b/config/rv32i/config.vh
@ -0,0 +1,157 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd32;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// I
+localparam MISA = (32'h00000104);
+localparam ZICSR_SUPPORTED = 0;
+localparam ZIFENCEI_SUPPORTED = 0;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 0;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 0;
+localparam DCACHE_SUPPORTED = 0;
+localparam ICACHE_SUPPORTED = 0;
+localparam VIRTMEM_SUPPORTED = 0;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
+localparam BIGENDIAN_SUPPORTED = 0;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd32;
+localparam DTLB_ENTRIES = 32'd32;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 0;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd0;
+
+// Address space
+localparam RESET_VECTOR = 64'h80000000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b1;
+localparam DTIM_BASE       = 64'h80000000;
+localparam DTIM_RANGE      = 64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b1;
+localparam IROM_BASE       = 64'h80000000;
+localparam IROM_RANGE      = 64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b0;
+localparam BOOTROM_BASE   = 64'h00001000;
+localparam BOOTROM_RANGE  = 64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b0;
+localparam UNCORE_RAM_BASE       = 64'h80000000;
+localparam UNCORE_RAM_RANGE      = 64'h07FFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE       = 64'h80000000;
+localparam EXT_MEM_RANGE      = 64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b0;
+localparam CLINT_BASE  = 64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b0;
+localparam GPIO_BASE   = 64'h10060000;
+localparam GPIO_RANGE  = 64'h000000FF;
+localparam UART_SUPPORTED = 1'b0;
+localparam UART_BASE   = 64'h10000000;
+localparam UART_RANGE  = 64'h00000007;
+localparam PLIC_SUPPORTED = 1'b0;
+localparam PLIC_BASE   = 64'h0C000000;
+localparam PLIC_RANGE  = 64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE   = 64'h00012100;
+localparam SDC_RANGE  = 64'h0000001F;
+
+// Bus Interface width
+localparam AHBW = 32'd32;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 0;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv32i/wally-config.vh
+++ b/config/rv32i/wally-config.vh
@ -136,6 +136,7 @@
 `define BPRED_SUPPORTED 0
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 0
--- a/config/rv32imc/config.vh
+++ b/config/rv32imc/config.vh
@ -0,0 +1,156 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd32;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12);
+localparam ZICSR_SUPPORTED = 1;
+localparam ZIFENCEI_SUPPORTED = 1;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 1;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 0;
+localparam ICACHE_SUPPORTED = 0;
+localparam VIRTMEM_SUPPORTED = 0;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
+localparam BIGENDIAN_SUPPORTED = 0;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd0;
+localparam DTLB_ENTRIES = 32'd0;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd2;
+localparam IDIV_ON_FPU = 0;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd0;
+
+// Address space
+localparam RESET_VECTOR = 64'h80000000;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+localparam DTIM_SUPPORTED = 1'b1;
+localparam DTIM_BASE       = 64'h80000000;
+localparam DTIM_RANGE      = 64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b1;
+localparam IROM_BASE       = 64'h80000000;
+localparam IROM_RANGE      = 64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b0;
+localparam BOOTROM_BASE   = 64'h00001000;
+localparam BOOTROM_RANGE  = 64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b0;
+localparam UNCORE_RAM_BASE       = 64'h80000000;
+localparam UNCORE_RAM_RANGE      = 64'h07FFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE       = 64'h80000000;
+localparam EXT_MEM_RANGE      = 64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b1;
+localparam CLINT_BASE  = 64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b1;
+localparam GPIO_BASE   = 64'h10060000;
+localparam GPIO_RANGE  = 64'h000000FF;
+localparam UART_SUPPORTED = 1'b1;
+localparam UART_BASE   = 64'h10000000;
+localparam UART_RANGE  = 64'h00000007;
+localparam PLIC_SUPPORTED = 1'b1;
+localparam PLIC_BASE   = 64'h0C000000;
+localparam PLIC_RANGE  = 64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE   = 64'h00012100;
+localparam SDC_RANGE  = 64'h0000001F;
+
+// Bus Interface width
+localparam AHBW = 32'd32;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 0;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv32imc/wally-config.vh
+++ b/config/rv32imc/wally-config.vh
@ -135,6 +135,7 @@
 `define BPRED_SUPPORTED 0
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 0
--- a/config/rv64fpquad/config.vh
+++ b/config/rv64fpquad/config.vh
@ -0,0 +1,159 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd64;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// MISA RISC-V configuration per specification
+localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
+localparam ZICSR_SUPPORTED = 1;
+localparam ZIFENCEI_SUPPORTED = 1;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 1;
+localparam ZFH_SUPPORTED = 1;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 1;
+localparam ICACHE_SUPPORTED = 1;
+localparam VIRTMEM_SUPPORTED = 1;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
+localparam BIGENDIAN_SUPPORTED = 1;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd32;
+localparam DTLB_ENTRIES = 32'd32;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 1;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd16;
+
+// Address space
+localparam RESET_VECTOR = 64'h0000000080000000;
+
+// Bus Interface width
+localparam AHBW = 32'd64;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Physiccal Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+
+// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE =       64'h80000000;
+localparam DTIM_RANGE =      64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE =       64'h80000000;
+localparam IROM_RANGE =      64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE =   64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
+localparam BOOTROM_RANGE =  64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE =       64'h80000000;
+localparam UNCORE_RAM_RANGE =      64'h7FFFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE =       64'h80000000;
+localparam EXT_MEM_RANGE =      64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b1;
+localparam CLINT_BASE =  64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b1;
+localparam GPIO_BASE =   64'h10060000;
+localparam GPIO_RANGE =  64'h000000FF;
+localparam UART_SUPPORTED = 1'b1;
+localparam UART_BASE =   64'h10000000;
+localparam UART_RANGE =  64'h00000007;
+localparam PLIC_SUPPORTED = 1'b1;
+localparam PLIC_BASE =   64'h0C000000;
+localparam PLIC_RANGE =  64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE =   64'h00012100;
+localparam SDC_RANGE =  64'h0000001F;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 1;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv64fpquad/wally-config.vh
+++ b/config/rv64fpquad/wally-config.vh
@ -138,6 +138,7 @@
 `define BPRED_SUPPORTED 1
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 0
--- a/config/rv64gc/config.vh
+++ b/config/rv64gc/config.vh
@ -0,0 +1,162 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// include shared configuration
+// `include "wally-shared.vh"
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd64;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// MISA RISC-V configuration per specification
+localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0);
+localparam ZICSR_SUPPORTED = 1;
+localparam ZIFENCEI_SUPPORTED = 1;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 1;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 1;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 1;
+localparam DCACHE_SUPPORTED = 1;
+localparam ICACHE_SUPPORTED = 1;
+localparam VIRTMEM_SUPPORTED = 1;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
+localparam BIGENDIAN_SUPPORTED = 1;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd32;
+localparam DTLB_ENTRIES = 32'd32;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 1;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd16;
+
+// Address space
+localparam RESET_VECTOR = 64'h0000000080000000;
+
+// Bus Interface width
+localparam AHBW = 32'd64;
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Physical Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+
+// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
+localparam DTIM_SUPPORTED = 1'b0;
+localparam DTIM_BASE =       64'h80000000;
+localparam DTIM_RANGE =      64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b0;
+localparam IROM_BASE =       64'h80000000;
+localparam IROM_RANGE =      64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b1;
+localparam BOOTROM_BASE =   64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder;
+localparam BOOTROM_RANGE =  64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b1;
+localparam UNCORE_RAM_BASE =       64'h80000000;
+localparam UNCORE_RAM_RANGE =      64'h7FFFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE =       64'h80000000;
+localparam EXT_MEM_RANGE =      64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b1;
+localparam CLINT_BASE =  64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b1;
+localparam GPIO_BASE =   64'h10060000;
+localparam GPIO_RANGE =  64'h000000FF;
+localparam UART_SUPPORTED = 1'b1;
+localparam UART_BASE =   64'h10000000;
+localparam UART_RANGE =  64'h00000007;
+localparam PLIC_SUPPORTED = 1'b1;
+localparam PLIC_BASE =   64'h0C000000;
+localparam PLIC_RANGE =  64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE =   64'h00012100;
+localparam SDC_RANGE =  64'h0000001F;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 1;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BPRED_SIZE = 32'd10;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 1;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 1;
+localparam ZBB_SUPPORTED = 1;
+localparam ZBC_SUPPORTED = 1;
+localparam ZBS_SUPPORTED = 1;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv64gc/wally-config.vh
+++ b/config/rv64gc/wally-config.vh
@ -136,8 +136,10 @@
 `define PLIC_UART_ID 10

 `define BPRED_SUPPORTED 1
-`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+//`define BPRED_TYPE "BP_GLOBAL_BASIC" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 4
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 1
--- a/config/rv64i/config.vh
+++ b/config/rv64i/config.vh
@ -0,0 +1,159 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "BranchPredictorType.vh"
+
+localparam FPGA = 0;
+localparam QEMU = 0;
+
+// RV32 or RV64: XLEN = 32 or 64
+localparam XLEN = 32'd64;
+
+// IEEE 754 compliance
+localparam IEEE754 = 0;
+
+// MISA RISC-V configuration per specification
+localparam MISA = (32'h00000104);
+localparam ZICSR_SUPPORTED = 0;
+localparam ZIFENCEI_SUPPORTED = 0;
+localparam COUNTERS = 12'd32;
+localparam ZICOUNTERS_SUPPORTED = 0;
+localparam ZFH_SUPPORTED = 0;
+localparam SSTC_SUPPORTED = 0;
+
+// LSU microarchitectural Features
+localparam BUS_SUPPORTED = 0;
+localparam DCACHE_SUPPORTED = 0;
+localparam ICACHE_SUPPORTED = 0;
+localparam VIRTMEM_SUPPORTED = 0;
+localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
+localparam BIGENDIAN_SUPPORTED = 0;
+
+// TLB configuration.  Entries should be a power of 2
+localparam ITLB_ENTRIES = 32'd0;
+localparam DTLB_ENTRIES = 32'd0;
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+localparam DCACHE_NUMWAYS = 32'd4;
+localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
+localparam DCACHE_LINELENINBITS = 32'd512;
+localparam ICACHE_NUMWAYS = 32'd4;
+localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
+localparam ICACHE_LINELENINBITS = 32'd512;
+
+// Integer Divider Configuration
+// IDIV_BITSPERCYCLE must be 1, 2, or 4
+localparam IDIV_BITSPERCYCLE = 32'd4;
+localparam IDIV_ON_FPU = 0;
+
+// Legal number of PMP entries are 0, 16, or 64
+localparam PMP_ENTRIES = 32'd0;
+
+// Address space
+localparam RESET_VECTOR = 64'h0000000080000000;
+
+// Bus Interface width
+localparam AHBW = (XLEN);
+
+// WFI Timeout Wait
+localparam WFI_TIMEOUT_BIT = 32'd16;
+
+// Peripheral Physiccal Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+
+// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
+localparam DTIM_SUPPORTED = 1'b1;
+localparam DTIM_BASE =       64'h80000000;
+localparam DTIM_RANGE =      64'h007FFFFF;
+localparam IROM_SUPPORTED = 1'b1;
+localparam IROM_BASE =       64'h80000000;
+localparam IROM_RANGE =      64'h007FFFFF;
+localparam BOOTROM_SUPPORTED = 1'b0;
+localparam BOOTROM_BASE =   64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
+localparam BOOTROM_RANGE =  64'h00000FFF;
+localparam UNCORE_RAM_SUPPORTED = 1'b0;
+localparam UNCORE_RAM_BASE =       64'h80000000;
+localparam UNCORE_RAM_RANGE =      64'h7FFFFFFF;
+localparam EXT_MEM_SUPPORTED = 1'b0;
+localparam EXT_MEM_BASE =       64'h80000000;
+localparam EXT_MEM_RANGE =      64'h07FFFFFF;
+localparam CLINT_SUPPORTED = 1'b0;
+localparam CLINT_BASE =  64'h02000000;
+localparam CLINT_RANGE = 64'h0000FFFF;
+localparam GPIO_SUPPORTED = 1'b0;
+localparam GPIO_BASE =   64'h10060000;
+localparam GPIO_RANGE =  64'h000000FF;
+localparam UART_SUPPORTED = 1'b0;
+localparam UART_BASE =   64'h10000000;
+localparam UART_RANGE =  64'h00000007;
+localparam PLIC_SUPPORTED = 1'b0;
+localparam PLIC_BASE =   64'h0C000000;
+localparam PLIC_RANGE =  64'h03FFFFFF;
+localparam SDC_SUPPORTED = 1'b0;
+localparam SDC_BASE =   64'h00012100;
+localparam SDC_RANGE =  64'h0000001F;
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+localparam GPIO_LOOPBACK_TEST = 1;
+
+// Hardware configuration
+localparam UART_PRESCALE = 32'd1;
+
+// Interrupt configuration
+localparam PLIC_NUM_SRC = 32'd10;
+// comment out the following if >=32 sources
+localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
+localparam PLIC_GPIO_ID = 32'd3;
+localparam PLIC_UART_ID = 32'd10;
+
+localparam BPRED_SUPPORTED = 0;
+localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
+localparam BPRED_SIZE = 32'd10;
+localparam BPRED_NUM_LHR = 32'd6;
+localparam BTB_SIZE = 32'd10;
+
+localparam SVADU_SUPPORTED = 0;
+localparam ZMMUL_SUPPORTED = 0;
+
+// FPU division architecture
+localparam RADIX = 32'h4;
+localparam DIVCOPIES = 32'h4;
+
+// bit manipulation
+localparam ZBA_SUPPORTED = 0;
+localparam ZBB_SUPPORTED = 0;
+localparam ZBC_SUPPORTED = 0;
+localparam ZBS_SUPPORTED = 0;
+
+// Memory synthesis configuration
+localparam USE_SRAM = 0;
+
+`include "test-shared.vh"
--- a/config/rv64i/wally-config.vh
+++ b/config/rv64i/wally-config.vh
@ -138,6 +138,7 @@
 `define BPRED_SUPPORTED 0
 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 `define BPRED_SIZE 10
+`define BPRED_NUM_LHR 6
 `define BTB_SIZE 10

 `define SVADU_SUPPORTED 0
--- a/config/shared/BranchPredictorType.vh
+++ b/config/shared/BranchPredictorType.vh
@ -0,0 +1,3 @@
+typedef enum  {BP_TWOBIT, BP_GSHARE, BP_GLOBAL, BP_GSHARE_BASIC, 
+               BP_GLOBAL_BASIC, BP_LOCAL_BASIC, BP_LOCAL_AHEAD, BP_LOCAL_REPAIR} BranchPredictorType;
+
--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@ -0,0 +1,165 @@
+
+// Populate parameter structure with values specific to the current configuration
+
+parameter cvw_t P = '{ 
+  FPGA :                 FPGA,  
+  QEMU :                 QEMU,  
+  XLEN :                 XLEN,  
+  IEEE754 :              IEEE754, 
+  MISA :                 MISA, 
+  AHBW :                 AHBW, 
+  ZICSR_SUPPORTED :      ZICSR_SUPPORTED,
+  ZIFENCEI_SUPPORTED :   ZIFENCEI_SUPPORTED,
+  COUNTERS :             COUNTERS,
+  ZICOUNTERS_SUPPORTED : ZICOUNTERS_SUPPORTED,
+  ZFH_SUPPORTED :        ZFH_SUPPORTED,
+  SSTC_SUPPORTED :       SSTC_SUPPORTED,
+  VIRTMEM_SUPPORTED :        VIRTMEM_SUPPORTED,
+  VECTORED_INTERRUPTS_SUPPORTED :        VECTORED_INTERRUPTS_SUPPORTED,
+  BIGENDIAN_SUPPORTED :        BIGENDIAN_SUPPORTED,
+  SVADU_SUPPORTED :        SVADU_SUPPORTED,
+  ZMMUL_SUPPORTED :        ZMMUL_SUPPORTED,
+  BUS_SUPPORTED :        BUS_SUPPORTED,
+  DCACHE_SUPPORTED :        DCACHE_SUPPORTED,
+  ICACHE_SUPPORTED :        ICACHE_SUPPORTED,
+  ITLB_ENTRIES :        ITLB_ENTRIES,
+  DTLB_ENTRIES :        DTLB_ENTRIES,
+  DCACHE_NUMWAYS :        DCACHE_NUMWAYS,
+  DCACHE_WAYSIZEINBYTES :        DCACHE_WAYSIZEINBYTES,
+  DCACHE_LINELENINBITS :        DCACHE_LINELENINBITS,
+  ICACHE_NUMWAYS :        ICACHE_NUMWAYS,
+  ICACHE_WAYSIZEINBYTES :        ICACHE_WAYSIZEINBYTES,
+  ICACHE_LINELENINBITS :        ICACHE_LINELENINBITS,
+  IDIV_BITSPERCYCLE :        IDIV_BITSPERCYCLE,
+  IDIV_ON_FPU :        IDIV_ON_FPU,
+  PMP_ENTRIES :        PMP_ENTRIES,
+  RESET_VECTOR :        RESET_VECTOR,
+  WFI_TIMEOUT_BIT :        WFI_TIMEOUT_BIT,
+  DTIM_SUPPORTED :        DTIM_SUPPORTED,
+  DTIM_BASE :        DTIM_BASE,
+  DTIM_RANGE :        DTIM_RANGE,
+  IROM_SUPPORTED :        IROM_SUPPORTED,
+  IROM_BASE :        IROM_BASE,
+  IROM_RANGE :        IROM_RANGE,
+  BOOTROM_SUPPORTED :        BOOTROM_SUPPORTED,
+  BOOTROM_BASE :        BOOTROM_BASE,
+  BOOTROM_RANGE :        BOOTROM_RANGE,
+  UNCORE_RAM_SUPPORTED :        UNCORE_RAM_SUPPORTED,
+  UNCORE_RAM_BASE :        UNCORE_RAM_BASE,
+  UNCORE_RAM_RANGE :        UNCORE_RAM_RANGE,
+  EXT_MEM_SUPPORTED :        EXT_MEM_SUPPORTED,
+  EXT_MEM_BASE :        EXT_MEM_BASE,
+  EXT_MEM_RANGE :        EXT_MEM_RANGE,
+  CLINT_SUPPORTED :        CLINT_SUPPORTED,
+  CLINT_BASE :        CLINT_BASE,
+  CLINT_RANGE :        CLINT_RANGE,
+  GPIO_SUPPORTED :        GPIO_SUPPORTED,
+  GPIO_BASE :        GPIO_BASE,
+  GPIO_RANGE :        GPIO_RANGE,
+  UART_SUPPORTED :        UART_SUPPORTED,
+  UART_BASE :        UART_BASE,
+  UART_RANGE :        UART_RANGE,
+  PLIC_SUPPORTED :        PLIC_SUPPORTED,
+  PLIC_BASE :        PLIC_BASE,
+  PLIC_RANGE :        PLIC_RANGE,
+  SDC_SUPPORTED :        SDC_SUPPORTED,
+  SDC_BASE :        SDC_BASE,
+  SDC_RANGE :        SDC_RANGE,
+  GPIO_LOOPBACK_TEST :        GPIO_LOOPBACK_TEST,
+  UART_PRESCALE :        UART_PRESCALE ,
+  PLIC_NUM_SRC :        PLIC_NUM_SRC,
+  PLIC_NUM_SRC_LT_32 :        PLIC_NUM_SRC_LT_32,
+  PLIC_GPIO_ID :        PLIC_GPIO_ID,
+  PLIC_UART_ID :        PLIC_UART_ID,
+  BPRED_SUPPORTED :        BPRED_SUPPORTED,
+  BPRED_TYPE :        BPRED_TYPE,
+  BPRED_SIZE :        BPRED_SIZE,
+  BPRED_NUM_LHR : BPRED_NUM_LHR,                       
+  BTB_SIZE :        BTB_SIZE,
+  RADIX :        RADIX,
+  DIVCOPIES :        DIVCOPIES,
+  ZBA_SUPPORTED :        ZBA_SUPPORTED,
+  ZBB_SUPPORTED :        ZBB_SUPPORTED,
+  ZBC_SUPPORTED :        ZBC_SUPPORTED,
+  ZBS_SUPPORTED :        ZBS_SUPPORTED,
+  USE_SRAM :        USE_SRAM,
+  M_MODE  : M_MODE, 
+  S_MODE  : S_MODE, 
+  U_MODE  : U_MODE, 
+  VPN_SEGMENT_BITS : VPN_SEGMENT_BITS,
+  VPN_BITS : VPN_BITS,
+  PPN_BITS : PPN_BITS,
+  PA_BITS : PA_BITS,
+  SVMODE_BITS : SVMODE_BITS,
+  ASID_BASE : ASID_BASE,
+  ASID_BITS : ASID_BITS,
+  NO_TRANSLATE : NO_TRANSLATE,
+  SV32 : SV32,
+  SV39 : SV39,
+  SV48 : SV48,
+  A_SUPPORTED : A_SUPPORTED,
+  B_SUPPORTED : B_SUPPORTED,
+  C_SUPPORTED : C_SUPPORTED,
+  D_SUPPORTED : D_SUPPORTED,
+  E_SUPPORTED : E_SUPPORTED,
+  F_SUPPORTED : F_SUPPORTED,
+  I_SUPPORTED : I_SUPPORTED,
+  M_SUPPORTED : M_SUPPORTED,
+  Q_SUPPORTED : Q_SUPPORTED,
+  S_SUPPORTED : S_SUPPORTED,
+  U_SUPPORTED : U_SUPPORTED,
+  LOG_XLEN : LOG_XLEN,
+  PMPCFG_ENTRIES : PMPCFG_ENTRIES,
+  Q_LEN : Q_LEN,
+  Q_NE : Q_NE,
+  Q_NF : Q_NF,
+  Q_BIAS : Q_BIAS,
+  Q_FMT : Q_FMT,
+  D_LEN : D_LEN,
+  D_NE : D_NE,
+  D_NF : D_NF,
+  D_BIAS : D_BIAS,
+  D_FMT : D_FMT,
+  S_LEN : S_LEN,
+  S_NE : S_NE,
+  S_NF : S_NF,
+  S_BIAS : S_BIAS,
+  S_FMT : S_FMT,
+  H_LEN : H_LEN,
+  H_NE : H_NE,
+  H_NF : H_NF,
+  H_BIAS : H_BIAS,
+  H_FMT : H_FMT,
+  FLEN : FLEN,
+  NE   : NE  ,
+  NF   : NF  ,
+  FMT  : FMT ,
+  BIAS : BIAS,
+  FPSIZES : FPSIZES,
+  FMTBITS : FMTBITS,
+  LEN1  : LEN1 ,
+  NE1   : NE1  ,
+  NF1   : NF1  ,
+  FMT1  : FMT1 ,
+  BIAS1 : BIAS1,
+  LEN2  : LEN2 ,
+  NE2   : NE2  ,
+  NF2   : NF2  ,
+  FMT2  : FMT2 ,
+  BIAS2 : BIAS2,
+  CVTLEN : CVTLEN,
+  LLEN : LLEN,
+  LOGCVTLEN : LOGCVTLEN,
+  NORMSHIFTSZ : NORMSHIFTSZ,
+  LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
+  CORRSHIFTSZ : CORRSHIFTSZ,
+  DIVN        : DIVN,
+  LOGR        : LOGR,
+  RK          : RK,
+  LOGRK       : LOGRK,
+  FPDUR       : FPDUR,
+  DURLEN      : DURLEN,
+  DIVb        : DIVb,
+  DIVBLEN     : DIVBLEN,
+  DIVa        : DIVa
+};
--- a/config/shared/test-shared.vh
+++ b/config/shared/test-shared.vh
@ -0,0 +1,119 @@
+// constants defining different privilege modes
+// defined in Table 1.1 of the privileged spec
+localparam M_MODE  = (2'b11);
+localparam S_MODE  = (2'b01);
+localparam U_MODE  = (2'b00);
+
+// Virtual Memory Constants
+localparam VPN_SEGMENT_BITS = (XLEN == 32 ? 32'd10 : 32'd9);
+localparam VPN_BITS = (XLEN==32 ? (2*VPN_SEGMENT_BITS) : (4*VPN_SEGMENT_BITS));
+localparam PPN_BITS = (XLEN==32 ? 32'd22 : 32'd44);
+localparam PA_BITS = (XLEN==32 ? 32'd34 : 32'd56);
+localparam SVMODE_BITS = (XLEN==32 ? 32'd1 : 32'd4);
+localparam ASID_BASE = (XLEN==32 ? 32'd22 : 32'd44);
+localparam ASID_BITS = (XLEN==32 ? 32'd9 : 32'd16);
+
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+localparam NO_TRANSLATE = 4'd0;
+localparam SV32 = 4'd1;
+localparam SV39 = 4'd8;
+localparam SV48 = 4'd9;
+
+// macros to define supported modes
+localparam A_SUPPORTED = ((MISA >> 0) % 2 == 1);
+localparam B_SUPPORTED = ((ZBA_SUPPORTED | ZBB_SUPPORTED | ZBC_SUPPORTED | ZBS_SUPPORTED));// not based on MISA
+localparam C_SUPPORTED = ((MISA >> 2) % 2 == 1);
+localparam D_SUPPORTED = ((MISA >> 3) % 2 == 1);
+localparam E_SUPPORTED = ((MISA >> 4) % 2 == 1);
+localparam F_SUPPORTED = ((MISA >> 5) % 2 == 1);
+localparam I_SUPPORTED = ((MISA >> 8) % 2 == 1);
+localparam M_SUPPORTED = ((MISA >> 12) % 2 == 1);
+localparam Q_SUPPORTED = ((MISA >> 16) % 2 == 1);
+localparam S_SUPPORTED = ((MISA >> 18) % 2 == 1);
+localparam U_SUPPORTED = ((MISA >> 20) % 2 == 1);
+// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
+
+// logarithm of XLEN, used for number of index bits to select
+localparam LOG_XLEN = (XLEN == 32 ? 32'd5 : 32'd6);
+
+// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
+localparam PMPCFG_ENTRIES = (PMP_ENTRIES/32'd8);
+
+// Floating point constants for Quad, Double, Single, and Half precisions
+// Lim: I've made some of these 64 bit to avoid width warnings. 
+// If errors crop up, try downsizing back to 32.
+localparam Q_LEN = 32'd128;
+localparam Q_NE = 32'd15;
+localparam Q_NF = 32'd112;
+localparam Q_BIAS = 32'd16383;
+localparam Q_FMT = 2'd3;
+localparam D_LEN = 32'd64;
+localparam D_NE = 32'd11;
+localparam D_NF = 32'd52;
+localparam D_BIAS = 32'd1023;
+localparam D_FMT = 2'd1;
+localparam S_LEN = 32'd32;
+localparam S_NE = 32'd8;
+localparam S_NF = 32'd23;
+localparam S_BIAS = 32'd127;
+localparam S_FMT = 2'd0;
+localparam H_LEN = 32'd16;
+localparam H_NE = 32'd5;
+localparam H_NF = 32'd10;
+localparam H_BIAS = 32'd15;
+localparam H_FMT = 2'd2;
+
+// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
+localparam FLEN = (Q_SUPPORTED ? Q_LEN  : D_SUPPORTED ? D_LEN  : S_LEN);
+localparam NE   = (Q_SUPPORTED ? Q_NE   : D_SUPPORTED ? D_NE   : S_NE);
+localparam NF   = (Q_SUPPORTED ? Q_NF   : D_SUPPORTED ? D_NF   : S_NF);
+localparam FMT  = (Q_SUPPORTED ? 2'd3    : D_SUPPORTED ? 2'd1    : 2'd0);
+localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS);
+/* Delete once tested dh 10/10/22
+
+localparam FLEN = (Q_SUPPORTED ? Q_LEN  : D_SUPPORTED ? D_LEN  : F_SUPPORTED ? S_LEN  : H_LEN);
+localparam NE   = (Q_SUPPORTED ? Q_NE   : D_SUPPORTED ? D_NE   : F_SUPPORTED ? S_NE   : H_NE);
+localparam NF   = (Q_SUPPORTED ? Q_NF   : D_SUPPORTED ? D_NF   : F_SUPPORTED ? S_NF   : H_NF); 
+localparam FMT  = (Q_SUPPORTED ? 2'd3       : D_SUPPORTED ? 2'd1       : F_SUPPORTED ? 2'd0       : 2'd2);
+localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/
+
+// Floating point constants needed for FPU paramerterization
+localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED));
+localparam FMTBITS = ((32)'(FPSIZES>=3)+1);
+localparam LEN1  = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN  : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN  : H_LEN);
+localparam NE1   = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE   : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE   : H_NE);
+localparam NF1   = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF   : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF   : H_NF);
+localparam FMT1  = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1    : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0    : 2'd2);
+localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS);
+localparam LEN2  = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN  : H_LEN);
+localparam NE2   = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE   : H_NE);
+localparam NF2   = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF   : H_NF);
+localparam FMT2  = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0    : 2'd2);
+localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
+
+// division constants
+localparam DIVN        = (((NF<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
+localparam LOGR        = ($clog2(RADIX));           // r = log(R)
+localparam RK          = (LOGR*DIVCOPIES);         // r*k used for intdiv preproc
+localparam LOGRK       = ($clog2(RK));               // log2(r*k)
+localparam FPDUR       = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
+localparam DURLEN      = ($clog2(FPDUR+1));
+localparam DIVb        = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
+localparam DIVBLEN     = ($clog2(DIVb+1)-1);
+localparam DIVa        = (DIVb+1-XLEN); // used for idiv on fpu
+
+// largest length in IEU/FPU
+localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF));
+localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($unsigned(FLEN)));
+localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
+localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
+localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
+localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
+
+
+// Disable spurious Verilator warnings
+
+/* verilator lint_off STMTDLY */
+/* verilator lint_off ASSIGNDLY */
+/* verilator lint_off PINCONNECTEMPTY */
--- a/sim/bpred-sim.py
+++ b/sim/bpred-sim.py
@ -46,18 +46,33 @@ configs = [
    )
 ]

+# bpdSize = [6, 8, 10, 12, 14, 16]
+# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
+# for CurrBPType in bpdType:
+#     for CurrBPSize in bpdSize:
+#         name = CurrBPType+str(CurrBPSize)
+#         configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
+#         tc = TestCase(
+#             name=name,
+#             variant="rv32gc",
+#             cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
+#             grepstr="")
+#         configs.append(tc)
+
 bpdSize = [6, 8, 10, 12, 14, 16]
-bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
+LHRSize = [4, 8, 10]
+bpdType = ['local_repair']
 for CurrBPType in bpdType:
    for CurrBPSize in bpdSize:
-        name = CurrBPType+str(CurrBPSize)
-        configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
-        tc = TestCase(
-            name=name,
-            variant="rv32gc",
-            cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
-            grepstr="")
-        configs.append(tc)
+        for CurrLHRSize in  LHRSize:
+            name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
+            configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
+            tc = TestCase(
+                name=name,
+                variant="rv32gc",
+                cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
+                grepstr="")
+            configs.append(tc)

 import os
 from multiprocessing import Pool, TimeoutError
--- a/sim/lint-wally
+++ b/sim/lint-wally
@ -8,7 +8,7 @@ basepath=$(dirname $0)/..
 for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do
 #for config in  rv64gc; do
    echo "$config linting..."
-    if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
+    if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/wally/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
        echo "Exiting after $config lint due to errors or warnings"
        exit 1
    fi
--- a/sim/wally-batch.do
+++ b/sim/wally-batch.do
@ -59,7 +59,7 @@ if {$argc >= 3} {
 # default to config/rv64ic, but allow this to be overridden at the command line.  For example:
 # do wally-pipelined-batch.do ../config/rv32imc rv32imc
 if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
-    vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
+    vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
    # start and run simulation
    if { $coverage } {
        echo "wally-batch buildroot coverage"
@ -88,7 +88,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
    exec ./slack-notifier/slack-notifier.py

 } elseif {$2 eq "ahb"} {
-    vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286  +define+RAM_LATENCY=$3 +define+BURST_EN=$4
+    vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286  +define+RAM_LATENCY=$3 +define+BURST_EN=$4
    # start and run simulation
    # remove +acc flag for faster sim during regressions if there is no need to access internal signals
    vopt wkdir/work_${1}_${2}_${3}_${4}.testbench -work wkdir/work_${1}_${2}_${3}_${4} -G TEST=$2 -o testbenchopt
@ -112,7 +112,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
    # **** fix this so we can pass any number of +defines.
    # only allows 3 right now

-    vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
+    vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
    # start and run simulation
    # remove +acc flag for faster sim during regressions if there is no need to access internal signals
    vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt
@ -126,7 +126,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
    # power off -r /dut/core/*

 } else {
-    vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
+    vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv   ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
    # start and run simulation
    # remove +acc flag for faster sim during regressions if there is no need to access internal signals
    if {$coverage} {
--- a/sim/wave.do
+++ b/sim/wave.do
@ -6,12 +6,6 @@ add wave -noupdate /testbench/reset
 add wave -noupdate /testbench/reset_ext
 add wave -noupdate /testbench/memfilename
 add wave -noupdate /testbench/dut/core/SATP_REGW
-add wave -noupdate /testbench/FunctionName/FunctionName/PCD
-add wave -noupdate /testbench/FunctionName/FunctionName/PCE
-add wave -noupdate /testbench/FunctionName/FunctionName/PCF
-add wave -noupdate /testbench/FunctionName/FunctionName/PCM
-add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp
-add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld
 add wave -noupdate /testbench/dut/core/InstrValidM
 add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr
 add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex
@ -38,15 +32,16 @@ add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv
 add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
 add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
 add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM
-add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushD
-add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushE
-add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushM
-add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushW
-add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF
-add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD
-add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE
-add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM
-add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW
+add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/HPTWInstrAccessFaultM
+add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
+add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
+add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
+add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
+add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF
+add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD
+add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE
+add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM
+add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW
 add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
 add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/PostSpillInstrRawF
 add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
@ -66,10 +61,10 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
 add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
 add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
 add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
-add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
-add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
-add wave -noupdate -group {Execution Stage} /testbench/InstrEName
-add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
+add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE
+add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE
+add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
+add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
 add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName
 add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM
 add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
@ -99,13 +94,13 @@ add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/STVEC_REGW
 add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FRM_REGW
 add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FFLAGS_REGW
 add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/STATUS_FS
-add wave -noupdate -group Bpred -group {branch update selection inputs} -divider {class check}
-add wave -noupdate -group Bpred -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
-add wave -noupdate -group Bpred -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -divider {class check}
+add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
+add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
+add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
+add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
+add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCF
+add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
 add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
 add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
 add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
@ -120,8 +115,8 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core
 add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
 add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
 add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/Result
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
+add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUResult
+add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/BALUControl
 add wave -noupdate -group alu -divider internals
 add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
 add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
@ -152,23 +147,24 @@ add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core
 add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUDisable
 add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/IFUSelect
 add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUSelect
+add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/ebufsmarb/CurrState
 add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HTRANS
 add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HBURST
-add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
-add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
-add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
-add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
-add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/HRDATA
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUReq
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/HRDATA
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
-add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
-add wave -noupdate -group AHB -expand -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
+add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
+add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
+add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
+add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
+add wave -noupdate -group AHB -group IFU /testbench/dut/core/HRDATA
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUReq
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/HRDATA
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
+add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
+add wave -noupdate -group AHB -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
 add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HCLK
 add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HRESETn
 add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HREADY
@ -210,12 +206,12 @@ add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/d
 add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
 add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrE
 add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrM
-add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
+add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
 add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
 add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
 add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay
 add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
-add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CAdr
+add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CacheSet
 add wave -noupdate -group lsu -group dcache -group {replacement policy} -color {Orange Red} {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]}
 add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
 add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
@ -235,7 +231,7 @@ add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/PAdr
-add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
+add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
 add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
@ -252,60 +248,59 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM[62]}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM[62]}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/we}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/RAM}
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/we}
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
-add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
+add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidWay}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
@ -323,14 +318,7 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
 add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
-add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteData
-add wave -noupdate -group lsu -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
-add wave -noupdate -group lsu -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
+add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextSet
 add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr
 add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
 add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord
@ -362,22 +350,27 @@ add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMASt
 add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
 add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
 add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWalk
-add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/WalkerState
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWAdr
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PTE
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/NextPageType
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PageType
-add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ValidNonLeafPTE
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/SelHPTW
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWStall
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/DTLBWalk
+add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/hptw/WalkerState
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWAdr
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PTE
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/NextPageType
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PageType
+add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/ValidNonLeafPTE
 add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF
 add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ITLBWriteF
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWriteM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSULoadAccessFaultM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSUStoreAmoAccessFaultM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWInstrAccessFaultM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LoadAccessFaultM
-add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/StoreAmoAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/ITLBWriteF
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/DTLBWriteM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/DCacheStallM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFaultF
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSULoadAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUStoreAmoAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LoadAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/StoreAmoAccessFaultM
+add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFault
 add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/UARTIntr
 add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/GPIOIntr
 add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/MExtInt
@ -396,9 +389,9 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/un
 add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/max_priority_with_irqs
 add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/irqs_at_max_priority
 add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/threshMask
-add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsIn
-add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsOut
-add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsEn
+add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIN
+add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOOUT
+add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOEN
 add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIntr
 add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PSEL
 add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PADDR
@ -462,7 +455,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/PostSpillInstrRawF
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUStallF
 add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/Spill/spill/CurrState
 add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/SpillF
-add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallD
+add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallF
 add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF
 add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF
 add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE
@ -483,54 +476,38 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and
 add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr
 add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck
 add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/RAM}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/bwe}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/dout}
-add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
+add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/dout}
+add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
 add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
 add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF
 add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/VAdr
@ -556,21 +533,21 @@ add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/c
 add wave -noupdate -expand -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]}
 add wave -noupdate -expand -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]}
 add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Access} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Access} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
+add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
 add wave -noupdate -expand -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]}
 add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]}
 add wave -noupdate -expand -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]}
@ -609,34 +586,22 @@ add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELNoneD
 add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELPLICD
 add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HRDATA
 add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/rd
-add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
 add wave -noupdate -group {branch direction} -expand -group {branch outcome} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCSrcE
 add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
 add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushE
-add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
-add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRD
-add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRE
 add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushD
-add wave -noupdate -group {branch direction} -expand -group nextghr2 /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
-add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexE
 add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/StallM
 add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
-add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
 add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
-add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
-add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
-add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheAccess
-add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheMiss
-add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAccess
-add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss
-add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM
-add wave -noupdate /testbench/clk
-add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH
-add wave -noupdate /testbench/HPMCSample/EndSample
-add wave -noupdate /testbench/HPMCSample/StartSample
+add wave -noupdate /testbench/dut/core/ifu/PCF
+add wave -noupdate /testbench/reset
+add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPDirPredD
+add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BranchM
+add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/NewBPDirPredM
+add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {23 ns} 0} {{Cursor 5} {394987 ns} 1}
-quietly wave cursor active 4
+WaveRestoreCursors {{Cursor 4} {12208 ns} 1} {{Cursor 4} {435726 ns} 0}
+quietly wave cursor active 2
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 194
 configure wave -justifyvalue left
@ -651,4 +616,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {0 ns} {52 ns}
+WaveRestoreZoom {435627 ns} {435795 ns}
--- a/src/cache/cache.sv
+++ b/src/cache/cache.sv
@ -27,9 +27,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
+module cache #(parameter PA_BITS, XLEN, LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
  input  logic                   clk,
  input  logic                   reset,
  input  logic                   Stall,             // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
@ -40,7 +38,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  input  logic                   FlushCache,        // Flush all dirty lines back to memory
  input  logic                   InvalidateCache,   // Clear all valid bits
  input  logic [11:0]            NextSet,           // Virtual address, but we only use the lower 12 bits.
-  input  logic [`PA_BITS-1:0]    PAdr,              // Physical address
+  input  logic [PA_BITS-1:0]     PAdr,              // Physical address
  input  logic [(WORDLEN-1)/8:0] ByteMask,          // Which bytes to write (D$ only)
  input  logic [WORDLEN-1:0]     CacheWriteData,    // Data to write to cache (D$ only)
  output logic                   CacheCommitted,    // Cache has started bus operation that shouldn't be interrupted
@ -57,7 +55,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  input  logic [LOGBWPL-1:0]     BeatCount,         // Beat in burst
  input  logic [LINELEN-1:0]     FetchBuffer,       // Buffer long enough to hold entire cache line arriving from bus
  output logic [1:0]             CacheBusRW,        // [1] Read (cache line fetch) or [0] write bus (cache line writeback)
-  output logic [`PA_BITS-1:0]    CacheBusAdr        // Address for bus access
+  output logic [PA_BITS-1:0]     CacheBusAdr        // Address for bus access
 );

  // Cache parameters
@ -65,7 +63,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  localparam                     OFFSETLEN = $clog2(LINEBYTELEN);    // Number of bits in offset field
  localparam                     SETLEN = $clog2(NUMLINES);          // Number of set bits
  localparam                     SETTOP = SETLEN+OFFSETLEN;          // Number of set plus offset bits
-  localparam                     TAGLEN = `PA_BITS - SETTOP;         // Number of tag bits
+  localparam                     TAGLEN = PA_BITS - SETTOP;         // Number of tag bits
  localparam                     CACHEWORDSPERLINE = LINELEN/WORDLEN;// Number of words in cache line
  localparam                     LOGCWPL = $clog2(CACHEWORDSPERLINE);// Log2 of ^
  localparam                     FLUSHADRTHRESHOLD = NUMLINES - 1;   // Used to determine when flush is complete
@ -114,7 +112,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
    AdrSelMuxSel, CacheSet);

  // Array of cache ways, along with victim, hit, dirty, and read merging logic
-  cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
+  cacheway #(PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
    .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
    .SetValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
    .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
@ -152,7 +150,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
    .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
  
  // Bus address for fetch, writeback, or flush writeback
-  mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
+  mux3 #(PA_BITS) CacheBusAdrMux(.d0({PAdr[PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
    .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
    .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
    .s({SelFlush, SelWriteback}), .y(CacheBusAdr));
--- a/src/cache/cacheLRU.sv
+++ b/src/cache/cacheLRU.sv
@ -113,7 +113,7 @@ module cacheLRU

  // The root node of the LRU tree will always be selected in LRUUpdate. No mux needed.
  assign NextLRU[NUMWAYS-2] = ~WayExpanded[NUMWAYS-2];
-  mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);
+  if (NUMWAYS > 2) mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);

  // Compute next victim way.
  for(node = NUMWAYS-2; node >= NUMWAYS/2; node--) begin
--- a/src/cache/cacheway.sv
+++ b/src/cache/cacheway.sv
@ -27,16 +27,14 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
+module cacheway #(parameter PA_BITS, XLEN, NUMLINES=512, LINELEN = 256, TAGLEN = 26,
                  OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
  input  logic                        clk,
  input  logic                        reset,
  input  logic                        FlushStage,     // Pipeline flush of second stage (prevent writes and bus operations)
  input  logic                        CacheEn,        // Enable the cache memory arrays.  Disable hold read data constant
  input  logic [$clog2(NUMLINES)-1:0] CacheSet,       // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
-  input  logic [`PA_BITS-1:0]         PAdr,           // Physical address 
+  input  logic [PA_BITS-1:0]          PAdr,           // Physical address 
  input  logic [LINELEN-1:0]          LineWriteData,  // Final data written to cache (D$ only)
  input  logic                        SetValid,       // Set the valid bit in the selected way and set
  input  logic                        SetDirty,       // Set the dirty bit in the selected way and set
@ -54,11 +52,11 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
  output logic                        DirtyWay,       // This way is dirty
  output logic [TAGLEN-1:0]           TagWay);        // This way's tag if valid

-  localparam                          WORDSPERLINE = LINELEN/`XLEN;
+  localparam                          WORDSPERLINE = LINELEN/XLEN;
  localparam                          BYTESPERLINE = LINELEN/8;
  localparam                          LOGWPL = $clog2(WORDSPERLINE);
-  localparam                          LOGXLENBYTES = $clog2(`XLEN/8);
-  localparam                          BYTESPERWORD = `XLEN/8;
+  localparam                          LOGXLENBYTES = $clog2(XLEN/8);
+  localparam                          BYTESPERWORD = XLEN/8;

  logic [NUMLINES-1:0]                ValidBits;
  logic [NUMLINES-1:0]                DirtyBits;
@ -113,12 +111,12 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,

  ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
    .addr(CacheSet), .dout(ReadTag),
-    .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
+    .din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));

  // AND portion of distributed tag multiplexer
  assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux
  assign DirtyWay = SelTag & Dirty & ValidWay;
-  assign HitWay = ValidWay & (ReadTag == PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]);
+  assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]);

  /////////////////////////////////////////////////////////////////////////////////////////////
  // Data Array
--- a/src/ebu/ahbcacheinterface.sv
+++ b/src/ebu/ahbcacheinterface.sv
@ -27,9 +27,10 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module ahbcacheinterface #(
+  parameter AHBW,
+  parameter LLEN,
+  parameter PA_BITS,
  parameter BEATSPERLINE,  // Number of AHBW words (beats) in cacheline
  parameter AHBWLOGBWPL,   // Log2 of ^
  parameter LINELEN,       // Number of bits in cacheline
@ -44,14 +45,14 @@ module ahbcacheinterface #(
  output logic [2:0]          HSIZE,                   // AHB transaction width
  output logic [2:0]          HBURST,                  // AHB burst length
  // bus interface buses
-  input  logic [`AHBW-1:0]    HRDATA,                  // AHB read data
-  output logic [`PA_BITS-1:0] HADDR,                   // AHB address
-  output logic [`AHBW-1:0]    HWDATA,                  // AHB write data
-  output logic [`AHBW/8-1:0]  HWSTRB,                  // AHB byte mask
+  input  logic [AHBW-1:0]    HRDATA,                  // AHB read data
+  output logic [PA_BITS-1:0] HADDR,                   // AHB address
+  output logic [AHBW-1:0]    HWDATA,                  // AHB write data
+  output logic [AHBW/8-1:0]  HWSTRB,                  // AHB byte mask
  
  // cache interface
-  input  logic [`PA_BITS-1:0] CacheBusAdr,            // Address of cache line
-  input  logic [`LLEN-1:0]    CacheReadDataWordM,     // One word of cache line during a writeback
+  input  logic [PA_BITS-1:0] CacheBusAdr,            // Address of cache line
+  input  logic [LLEN-1:0]    CacheReadDataWordM,     // One word of cache line during a writeback
  input  logic                CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$
  input  logic                Cacheable,              // Memory operation is cachable
  input  logic [1:0]          CacheBusRW,             // Cache bus operation, 01: writeback, 10: fetch
@ -61,8 +62,8 @@ module ahbcacheinterface #(
  output logic                SelBusBeat,             // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr

  // uncached interface 
-  input logic [`PA_BITS-1:0]  PAdr,                    // Physical address of uncached memory operation
-  input logic [`LLEN-1:0]     WriteDataM,              // IEU write data for uncached store
+  input logic [PA_BITS-1:0]  PAdr,                    // Physical address of uncached memory operation
+  input logic [LLEN-1:0]     WriteDataM,              // IEU write data for uncached store
  input logic [1:0]           BusRW,                   // Uncached memory operation read/write control: 10: read, 01: write
  input logic [2:0]           Funct3,                  // Size of uncached memory operation

@ -74,11 +75,11 @@ module ahbcacheinterface #(
  

  localparam                  BeatCountThreshold = BEATSPERLINE - 1;  // Largest beat index
-  logic [`PA_BITS-1:0]        LocalHADDR;                             // Address after selecting between cached and uncached operation
+  logic [PA_BITS-1:0]        LocalHADDR;                             // Address after selecting between cached and uncached operation
  logic [AHBWLOGBWPL-1:0]     BeatCountDelayed;                       // Beat within the cache line in the second (Data) cache stage
  logic                       CaptureEn;                              // Enable updating the Fetch buffer with valid data from HRDATA
-  logic [`AHBW/8-1:0]         BusByteMaskM;                           // Byte enables within a word. For cache request all 1s
-  logic [`AHBW-1:0]           PreHWDATA;                              // AHB Address phase write data
+  logic [AHBW/8-1:0]         BusByteMaskM;                           // Byte enables within a word. For cache request all 1s
+  logic [AHBW-1:0]           PreHWDATA;                              // AHB Address phase write data

  genvar                      index;

@ -86,35 +87,35 @@ module ahbcacheinterface #(
  for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer
    logic [BEATSPERLINE-1:0] CaptureBeat;
    assign CaptureBeat[index] = CaptureEn & (index == BeatCountDelayed);
-    flopen #(`AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
-      .q(FetchBuffer[(index+1)*`AHBW-1:index*`AHBW]));
+    flopen #(AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
+      .q(FetchBuffer[(index+1)*AHBW-1:index*AHBW]));
  end

-  mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
-  assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
+  mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
+  assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR;

-  mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
+  mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));

  // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data.
-  logic [`AHBW-1:0]          CacheReadDataWordAHB;
+  logic [AHBW-1:0]          CacheReadDataWordAHB;
  if(LLENPOVERAHBW > 1) begin
-    logic [`AHBW-1:0]          AHBWordSets [(LLENPOVERAHBW)-1:0];
+    logic [AHBW-1:0]          AHBWordSets [(LLENPOVERAHBW)-1:0];
    genvar                     index;
    for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux
-        assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)];
+        assign AHBWordSets[index] = CacheReadDataWordM[(index*AHBW)+AHBW-1: (index*AHBW)];
    end
    assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]];
-  end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0];      
+  end else assign CacheReadDataWordAHB = CacheReadDataWordM[AHBW-1:0];      
  
-  mux2 #(`AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[`AHBW-1:0]),
+  mux2 #(AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[AHBW-1:0]),
    .s(~(CacheableOrFlushCacheM)), .y(PreHWDATA));
-  flopen #(`AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
+  flopen #(AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec

  // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
  // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
-  swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
+  swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
  
-  flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB);
+  flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB);
  
  buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm(
    .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,
--- a/src/ebu/ahbinterface.sv
+++ b/src/ebu/ahbinterface.sv
@ -27,9 +27,8 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module ahbinterface #(
+  parameter XLEN,
  parameter LSU = 0                                   // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
 )( 
  input  logic                          HCLK, HRESETn,
@ -37,30 +36,30 @@ module ahbinterface #(
  input  logic                          HREADY,       // AHB peripheral ready
  output logic [1:0]                    HTRANS,       // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
  output logic                          HWRITE,       // AHB 0: Read operation 1: Write operation 
-  input  logic [`XLEN-1:0]              HRDATA,       // AHB read data
-  output logic [`XLEN-1:0]              HWDATA,       // AHB write data
-  output logic [`XLEN/8-1:0]            HWSTRB,       // AHB byte mask
+  input  logic [XLEN-1:0]              HRDATA,       // AHB read data
+  output logic [XLEN-1:0]              HWDATA,       // AHB write data
+  output logic [XLEN/8-1:0]            HWSTRB,       // AHB byte mask
  
  // lsu/ifu interface
  input  logic                          Stall,        // Core pipeline is stalled
  input  logic                          Flush,        // Pipeline stage flush. Prevents bus transaction from starting
  input  logic [1:0]                    BusRW,        // Memory operation read/write control: 10: read, 01: write
-  input  logic [`XLEN/8-1:0]            ByteMask,     // Bytes enables within a word
-  input  logic [`XLEN-1:0]              WriteData,    // IEU write data for a store
+  input  logic [XLEN/8-1:0]            ByteMask,     // Bytes enables within a word
+  input  logic [XLEN-1:0]              WriteData,    // IEU write data for a store
  output logic                          BusStall,     // Bus is busy with an in flight memory operation
  output logic                          BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
-  output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer   // Register to hold HRDATA after arriving from the bus
+  output logic [(LSU ? XLEN : 32)-1:0] FetchBuffer   // Register to hold HRDATA after arriving from the bus
 );
  
  logic                                 CaptureEn;
-  localparam                            LEN = (LSU ? `XLEN : 32);   // 32 bits for IFU, XLEN for LSU
+  localparam                            LEN = (LSU ? XLEN : 32);   // 32 bits for IFU, XLEN for LSU
  
  flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer));

  if(LSU) begin
    // delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN    
-    flop #(`XLEN)   wdreg(HCLK, WriteData, HWDATA); 
-    flop #(`XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
+    flop #(XLEN)   wdreg(HCLK, WriteData, HWDATA); 
+    flop #(XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
  end else begin
    assign HWDATA = '0;
    assign HWSTRB = '0;
--- a/src/ebu/buscachefsm.sv
+++ b/src/ebu/buscachefsm.sv
@ -27,7 +27,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
 `define BURST_EN 1         // Enables burst mode.  Disable to show the lost performance.

 // HCLK and clk must be the same clock!
--- a/src/ebu/busfsm.sv
+++ b/src/ebu/busfsm.sv
@ -27,8 +27,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 // HCLK and clk must be the same clock!
 module busfsm (
  input  logic       HCLK,
--- a/src/ebu/controllerinput.sv
+++ b/src/ebu/controllerinput.sv
@ -31,9 +31,8 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module controllerinput #(
+  parameter PA_BITS, 
  parameter SAVE_ENABLED = 1           // 1: Save manager inputs if Save = 1, 0: Don't save inputs
 )(
  input  logic                HCLK, 
@ -47,14 +46,14 @@ module controllerinput #(
  input  logic                HWRITEIn,  // Manager input. AHB 0: Read operation 1: Write operation 
  input  logic [2:0]          HSIZEIn,   // Manager input. AHB transaction width
  input  logic [2:0]          HBURSTIn,  // Manager input. AHB burst length
-  input  logic [`PA_BITS-1:0] HADDRIn,   // Manager input. AHB address
+  input  logic [PA_BITS-1:0] HADDRIn,   // Manager input. AHB address
  output logic                HREADYOut, // Indicate to manager the peripheral is not busy and another manager does not have priority
  // controller output
  output logic [1:0]          HTRANSOut, // Arbitrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
  output logic                HWRITEOut, // Arbitrated manager transaction. AHB 0: Read operation 1: Write operation 
  output logic [2:0]          HSIZEOut,  // Arbitrated manager transaction. AHB transaction width
  output logic [2:0]          HBURSTOut, // Arbitrated manager transaction. AHB burst length 
-  output logic [`PA_BITS-1:0] HADDROut,  // Arbitrated manager transaction. AHB address
+  output logic [PA_BITS-1:0] HADDROut,  // Arbitrated manager transaction. AHB address
  input  logic                HREADYIn   // Peripheral ready
 );

@ -62,13 +61,13 @@ module controllerinput #(
  logic [2:0]                 HSIZESave;
  logic [2:0]                 HBURSTSave;
  logic [1:0]                 HTRANSSave;
-  logic [`PA_BITS-1:0]        HADDRSave;
+  logic [PA_BITS-1:0]        HADDRSave;

  if (SAVE_ENABLED) begin
-    flopenr #(1+3+3+2+`PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
+    flopenr #(1+3+3+2+PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
      {HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, 
      {HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave});
-    mux2 #(1+3+3+2+`PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, 
+    mux2 #(1+3+3+2+PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn}, 
      {HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave},
      Restore,
      {HWRITEOut, HSIZEOut, HBURSTOut, HTRANSOut, HADDROut});
--- a/src/ebu/ebu.sv
+++ b/src/ebu/ebu.sv
@ -31,33 +31,31 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module ebu (
+module ebu #(parameter XLEN, PA_BITS, AHBW)(
  input  logic                clk, reset,
  // Signals from IFU
  input  logic [1:0]          IFUHTRANS, // IFU AHB transaction request
  input  logic [2:0]          IFUHSIZE,  // IFU AHB transaction size
  input  logic [2:0]          IFUHBURST, // IFU AHB burst length
-  input  logic [`PA_BITS-1:0] IFUHADDR,  // IFU AHB address
+  input  logic [PA_BITS-1:0] IFUHADDR,  // IFU AHB address
  output logic                IFUHREADY, // AHB peripheral ready gated by possible non-grant
  // Signals from LSU
  input  logic [1:0]          LSUHTRANS, // LSU AHB transaction request
  input  logic                LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read
  input  logic [2:0]          LSUHSIZE,  // LSU AHB size
  input  logic [2:0]          LSUHBURST, // LSU AHB burst length
-  input  logic [`PA_BITS-1:0] LSUHADDR,  // LSU AHB address
-  input  logic [`XLEN-1:0]    LSUHWDATA, // initially support AHBW = XLEN
-  input  logic [`XLEN/8-1:0]  LSUHWSTRB, // AHB byte mask
+  input  logic [PA_BITS-1:0] LSUHADDR,  // LSU AHB address
+  input  logic [XLEN-1:0]    LSUHWDATA, // initially support AHBW = XLEN
+  input  logic [XLEN/8-1:0]  LSUHWSTRB, // AHB byte mask
  output logic                LSUHREADY, // AHB peripheral. Never gated as LSU always has priority

  // AHB-Lite external signals
  output logic                HCLK, HRESETn, 
  input  logic                HREADY,    // AHB peripheral ready
  input  logic                HRESP,     // AHB peripheral response. 0: OK 1: Error
-  output logic [`PA_BITS-1:0] HADDR,     // AHB address to peripheral after arbitration
-  output logic [`AHBW-1:0]    HWDATA,    // AHB Write data after arbitration
-  output logic [`XLEN/8-1:0]  HWSTRB,    // AHB byte write enables after arbitration
+  output logic [PA_BITS-1:0] HADDR,     // AHB address to peripheral after arbitration
+  output logic [AHBW-1:0]    HWDATA,    // AHB Write data after arbitration
+  output logic [XLEN/8-1:0]  HWSTRB,    // AHB byte write enables after arbitration
  output logic                HWRITE,    // AHB transaction direction after arbitration
  output logic [2:0]          HSIZE,     // AHB transaction size after arbitration
  output logic [2:0]          HBURST,    // AHB burst length after arbitration
@ -73,13 +71,13 @@ module ebu (
  logic                       IFUDisable;
  logic                       IFUSelect;

-  logic [`PA_BITS-1:0]        IFUHADDROut;
+  logic [PA_BITS-1:0]        IFUHADDROut;
  logic [1:0]                 IFUHTRANSOut;
  logic [2:0]                 IFUHBURSTOut;
  logic [2:0]                 IFUHSIZEOut;
  logic                       IFUHWRITEOut;
  
-  logic [`PA_BITS-1:0]        LSUHADDROut;
+  logic [PA_BITS-1:0]        LSUHADDROut;
  logic [1:0]                 LSUHTRANSOut;
  logic [2:0]                 LSUHBURSTOut;
  logic [2:0]                 LSUHSIZEOut;
@ -98,14 +96,14 @@ module ebu (
  // input stages and muxing for IFU and LSU
  ////////////////////////////////////////////////////////////////////////////////////////////////////

-  controllerinput IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
+  controllerinput #(PA_BITS) IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
    .Request(IFUReq),
    .HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR),
    .HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY),
    .HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY));

  // LSU always has priority so there should never be a need to save and restore the address phase inputs.
-  controllerinput #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
+  controllerinput #(PA_BITS, 0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
    .Request(LSUReq),
    .HWRITEIn(LSUHWRITE), .HSIZEIn(LSUHSIZE), .HBURSTIn(LSUHBURST), .HTRANSIn(LSUHTRANS), .HADDRIn(LSUHADDR), .HREADYOut(LSUHREADY),
    .HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut),
--- a/src/ebu/ebufsmarb.sv
+++ b/src/ebu/ebufsmarb.sv
@ -28,8 +28,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module ebufsmarb (
  input  logic       HCLK,
  input  logic       HRESETn,
--- a/src/fpu/fclassify.sv
+++ b/src/fpu/fclassify.sv
@ -25,16 +25,15 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module fclassify (
+module fclassify import cvw::*;  #(parameter cvw_t P) (
  input  logic                Xs,         // sign bit
  input  logic                XNaN,       // is NaN
  input  logic                XSNaN,      // is signaling NaN
  input  logic                XSubnorm,   // is Subnormal
  input  logic                XZero,      // is zero
  input  logic                XInf,       // is infinity
-  output logic [`XLEN-1:0]    ClassRes    // classify result
+  output logic [P.XLEN-1:0]    ClassRes    // classify result
 );

  logic PInf, PZero, PNorm, PSubnorm;     // is the input a positive infinity/zero/normal/subnormal
@ -63,6 +62,6 @@ module fclassify (
  //  bit 7 - +Inf
  //  bit 8 - signaling NaN
  //  bit 9 - quiet NaN
-  assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
+  assign ClassRes = {{P.XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};

 endmodule
--- a/src/fpu/fcmp.sv
+++ b/src/fpu/fcmp.sv
@ -27,8 +27,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 // OpCtrl values
 //    110   min
 //    101   max
@ -36,23 +34,23 @@
 //    001   less than
 //    011   less than or equal

-module fcmp (   
-  input  logic [`FMTBITS-1:0]   Fmt,           // format of fp number
+module fcmp import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0]   Fmt,           // format of fp number
  input  logic [2:0]            OpCtrl,        // see above table
  input  logic                  Xs, Ys,        // input signs
-  input  logic [`NE-1:0]        Xe, Ye,        // input exponents
-  input  logic [`NF:0]          Xm, Ym,        // input mantissa
+  input  logic [P.NE-1:0]        Xe, Ye,        // input exponents
+  input  logic [P.NF:0]          Xm, Ym,        // input mantissa
  input  logic                  XZero, YZero,  // is zero
  input  logic                  XNaN, YNaN,    // is NaN
  input  logic                  XSNaN, YSNaN,  // is signaling NaN
-  input  logic [`FLEN-1:0]      X, Y,          // original inputs (before unpacker)
+  input  logic [P.FLEN-1:0]      X, Y,          // original inputs (before unpacker)
  output logic                  CmpNV,         // invalid flag
-  output logic [`FLEN-1:0]      CmpFpRes,      // compare floating-point result
-  output logic [`XLEN-1:0]      CmpIntRes      // compare integer result
+  output logic [P.FLEN-1:0]      CmpFpRes,      // compare floating-point result
+  output logic [P.XLEN-1:0]      CmpIntRes      // compare integer result
 );

  logic LTabs, LT, EQ;         // is X < or > or = Y
-  logic [`FLEN-1:0] NaNRes;    // NaN result
+  logic [P.FLEN-1:0] NaNRes;    // NaN result
  logic BothZero;              // are both inputs zero
  logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
  
@ -85,44 +83,44 @@ module fcmp (
  // for RISC-V, return the canonical NaN

  // select the NaN result
-  if (`FPSIZES == 1)
-    if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-    else         assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+  if (P.FPSIZES == 1)
+    if(P.IEEE754) assign NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+    else         assign NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};

-  else if (`FPSIZES == 2) 
-    if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-    else         assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+  else if (P.FPSIZES == 2) 
+    if(P.IEEE754) assign NaNRes = Fmt ? {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+    else         assign NaNRes = Fmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
  
-  else if (`FPSIZES == 3)
+  else if (P.FPSIZES == 3)
    always_comb
          case (Fmt)
-              `FMT:  
-                if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-              `FMT1:
-                if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-                else         NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-              `FMT2:
-                if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
-                else         NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-              default:        NaNRes = {`FLEN{1'bx}};
+              P.FMT:  
+                if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                else         NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+              P.FMT1:
+                if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+                else         NaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+              P.FMT2:
+                if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN2{1'b1}}, Xs, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
+                else         NaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
+              default:        NaNRes = {P.FLEN{1'bx}};
          endcase

-  else if (`FPSIZES == 4)
+  else if (P.FPSIZES == 4)
    always_comb
          case (Fmt)
              2'h3:  
-                if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                else         NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
              2'h1:  
-                if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
-                else         NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                if(P.IEEE754) NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, Xs, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
+                else         NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
              2'h0: 
-                if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
-                else         NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                if(P.IEEE754) NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, Xs, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
+                else         NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
              2'h2:
-                if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
-                else         NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                if(P.IEEE754) NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, Xs, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
+                else         NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
          endcase


@ -155,6 +153,6 @@ module fcmp (
  //    - -0 = 0
  //    - inf = inf and -inf = -inf
  //    - return 0 if comparison with NaN (unordered)
-  assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
+  assign CmpIntRes = {(P.XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
  
 endmodule
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@ -25,9 +25,8 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module fctrl (
+module fctrl import cvw::*;  #(parameter cvw_t P) (
  input  logic                clk,
  input  logic                reset,
  // input control signals
@ -49,7 +48,7 @@ module fctrl (
  // opperation mux selections                                    
  output logic                FCvtIntE, FCvtIntW,                 // convert to integer opperation
  output logic [2:0]          FrmM,                               // FP rounding mode
-  output logic [`FMTBITS-1:0] FmtE, FmtM,                         // FP format
+  output logic [P.FMTBITS-1:0] FmtE, FmtM,                         // FP format
  output logic [2:0]          OpCtrlE, OpCtrlM,                   // Select which opperation to do in each component
  output logic                FpLoadStoreM,                       // FP load or store instruction
  output logic [1:0]          PostProcSelE, PostProcSelM,         // select result in the post processing unit
@ -74,7 +73,7 @@ module fctrl (
  logic [1:0]                 PostProcSelD;       // select result in the post processing unit
  logic [1:0]                 FResSelD;           // Select one of the results that finish in the memory stage
  logic [2:0]                 FrmD, FrmE;         // FP rounding mode
-  logic [`FMTBITS-1:0]        FmtD;               // FP format
+  logic [P.FMTBITS-1:0]        FmtD;               // FP format
  logic [1:0]                 Fmt, Fmt2;          // format - before possible reduction
  logic                       SupportedFmt;       // is the format supported
  logic                       SupportedFmt2;      // is the source format supported for fp -> fp
@ -84,10 +83,10 @@ module fctrl (
  assign Fmt = Funct7D[1:0];
  assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp

-  assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
-                         (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
-  assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
-                         (Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));
+  assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) |
+                         (Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED));
+  assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) |
+                         (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));

  // decode the instruction                       
  // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
@ -102,15 +101,15 @@ module fctrl (
      case(OpD)
        7'b0000111: case(Funct3D)
                      3'b010:                      ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw
-                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
-                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
-                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
+                      3'b011:  if (P.D_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
+                      3'b100:  if (P.Q_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
+                      3'b001:  if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
                    endcase
        7'b0100111: case(Funct3D)
                      3'b010:                      ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw
-                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
-                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
-                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
+                      3'b011:  if (P.D_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
+                      3'b100:  if (P.Q_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
+                      3'b001:  if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
                    endcase
        7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
        7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
@ -227,14 +226,14 @@ module fctrl (
  //    10 - half
  //    11 - quad
  
-    if (`FPSIZES == 1)
+    if (P.FPSIZES == 1)
      assign FmtD = 0;
-    else if (`FPSIZES == 2)begin
+    else if (P.FPSIZES == 2)begin
      logic [1:0] FmtTmp;
      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
-      assign FmtD = (`FMT == FmtTmp);
+      assign FmtD = (P.FMT == FmtTmp);
    end
-    else if (`FPSIZES == 3|`FPSIZES == 4)
+    else if (P.FPSIZES == 3|P.FPSIZES == 4)
      assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];

  // Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
@ -313,7 +312,7 @@ module fctrl (
  assign Adr3D = InstrD[31:27];
 
  // D/E pipleine register
-  flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+  flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
  flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
@ -321,11 +320,11 @@ module fctrl (
  flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});

  // Integer division on FPU divider
-  if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
+  if (P.M_SUPPORTED & P.IDIV_ON_FPU) assign IDivStartE = IntDivE;
  else                             assign IDivStartE = 0; 

  // E/M pipleine register
-  flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
+  flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
              {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE},
              {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM});
  
--- a/src/fpu/fcvt.sv
+++ b/src/fpu/fcvt.sv
@ -27,23 +27,21 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fcvt (
+module fcvt import cvw::*;  #(parameter cvw_t P) (
  input  logic                    Xs,          // input's sign
-  input  logic [`NE-1:0]          Xe,          // input's exponent
-  input  logic [`NF:0]            Xm,          // input's fraction
-  input  logic [`XLEN-1:0]        Int,         // integer input - from IEU
+  input  logic [P.NE-1:0]          Xe,          // input's exponent
+  input  logic [P.NF:0]            Xm,          // input's fraction
+  input  logic [P.XLEN-1:0]        Int,         // integer input - from IEU
  input  logic [2:0]              OpCtrl,      // choose which opperation (look below for values)
  input  logic                    ToInt,       // is fp->int (since it's writting to the integer register)
  input  logic                    XZero,       // is the input zero
-  input  logic [`FMTBITS-1:0]     Fmt,         // the input's precision (11=quad 01=double 00=single 10=half)
-  output logic [`NE:0]            Ce,          // the calculated expoent
-  output logic [`LOGCVTLEN-1:0]   ShiftAmt,    // how much to shift by
+  input  logic [P.FMTBITS-1:0]     Fmt,         // the input's precision (11=quad 01=double 00=single 10=half)
+  output logic [P.NE:0]            Ce,          // the calculated expoent
+  output logic [P.LOGCVTLEN-1:0]   ShiftAmt,    // how much to shift by
  output logic                    ResSubnormUf,// does the result underflow or is subnormal
  output logic                    Cs,          // the result's sign
  output logic                    IntZero,     // is the integer zero?
-  output logic [`CVTLEN-1:0]      LzcIn        // input to the Leading Zero Counter (priority encoder)
+  output logic [P.CVTLEN-1:0]      LzcIn        // input to the Leading Zero Counter (priority encoder)
  );

  // OpCtrls:
@ -56,16 +54,16 @@ module fcvt (
  //                            bit 2              bit 1                   bit 0
  //      for example: signed long -> single floating point has the OpCode 101

-  logic [`FMTBITS-1:0]    OutFmt;     // format of the output
-  logic [`XLEN-1:0]       PosInt;     // the positive integer input
-  logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
-  logic [`NE-2:0]         NewBias;    // the bias of the final result
-  logic [`NE-1:0]         OldExp;     // the old exponent
+  logic [P.FMTBITS-1:0]    OutFmt;     // format of the output
+  logic [P.XLEN-1:0]       PosInt;     // the positive integer input
+  logic [P.XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
+  logic [P.NE-2:0]         NewBias;    // the bias of the final result
+  logic [P.NE-1:0]         OldExp;     // the old exponent
  logic                   Signed;     // is the opperation with a signed integer?
  logic                   Int64;      // is the integer 64 bits?
  logic                   IntToFp;    // is the opperation an int->fp conversion?
-  logic [`CVTLEN:0]       LzcInFull;  // input to the Leading Zero Counter (priority encoder)
-  logic [`LOGCVTLEN-1:0]  LeadingZeros; // output from the LZC
+  logic [P.CVTLEN:0]       LzcInFull;  // input to the Leading Zero Counter (priority encoder)
+  logic [P.LOGCVTLEN-1:0]  LeadingZeros; // output from the LZC


  // seperate OpCtrl for code readability
@ -76,9 +74,9 @@ module fcvt (
  // choose the ouptut format depending on the opperation
  //      - fp -> fp: OpCtrl contains the percision of the output
  //      - int -> fp: Fmt contains the percision of the output
-  if (`FPSIZES == 2) 
-      assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); 
-  else if (`FPSIZES == 3 | `FPSIZES == 4) 
+  if (P.FPSIZES == 2) 
+      assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT); 
+  else if (P.FPSIZES == 3 | P.FPSIZES == 4) 
      assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; 


@ -89,7 +87,7 @@ module fcvt (
  // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)

  assign PosInt = Cs ? -Int : Int;
-  assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
+  assign TrimInt = {{P.XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
  assign IntZero = ~|TrimInt;

  ///////////////////////////////////////////////////////////////////////////
@ -99,13 +97,13 @@ module fcvt (
  // choose the input to the leading zero counter i.e. priority encoder
  //             int -> fp : | positive integer | 00000... (if needed) | 
  //             fp  -> fp : | fraction         | 00000... (if needed) | 
-  assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
-                            {Xm, {`CVTLEN-`NF{1'b0}}};
+  assign LzcInFull = IntToFp ? {TrimInt, {P.CVTLEN-P.XLEN+1{1'b0}}} :
+                            {Xm, {P.CVTLEN-P.NF{1'b0}}};

  // used as shifter input in postprocessor
-  assign LzcIn = LzcInFull[`CVTLEN-1:0];
+  assign LzcIn = LzcInFull[P.CVTLEN-1:0];
  
-  lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
+  lzc #(P.CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
  
  ///////////////////////////////////////////////////////////////////////////
  // exp calculations
@ -114,42 +112,42 @@ module fcvt (
  // Select the bias of the output
  //      fp -> int : select 1
  //      ??? -> fp : pick the new bias depending on the output format 
-  if (`FPSIZES == 1) begin
-      assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); 
+  if (P.FPSIZES == 1) begin
+      assign NewBias = ToInt ? (P.NE-1)'(1) : (P.NE-1)'(P.BIAS); 

-  end else if (`FPSIZES == 2) begin
-      logic [`NE-2:0] NewBiasToFp;
-      assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
-      assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
+  end else if (P.FPSIZES == 2) begin
+      logic [P.NE-2:0] NewBiasToFp;
+      assign NewBiasToFp = OutFmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); 
+      assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; 

-  end else if (`FPSIZES == 3) begin
-      logic [`NE-2:0] NewBiasToFp;
+  end else if (P.FPSIZES == 3) begin
+      logic [P.NE-2:0] NewBiasToFp;
      always_comb
          case (OutFmt)
-              `FMT: NewBiasToFp =  (`NE-1)'(`BIAS);
-              `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
-              `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
-              default: NewBiasToFp = {`NE-1{1'bx}};
+              P.FMT: NewBiasToFp =  (P.NE-1)'(P.BIAS);
+              P.FMT1: NewBiasToFp = (P.NE-1)'(P.BIAS1);
+              P.FMT2: NewBiasToFp = (P.NE-1)'(P.BIAS2);
+              default: NewBiasToFp = {P.NE-1{1'bx}};
          endcase
-      assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
+      assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; 

-  end else if (`FPSIZES == 4) begin        
-      logic [`NE-2:0] NewBiasToFp;
+  end else if (P.FPSIZES == 4) begin        
+      logic [P.NE-2:0] NewBiasToFp;
      always_comb
          case (OutFmt)
-              2'h3: NewBiasToFp =  (`NE-1)'(`Q_BIAS);
-              2'h1: NewBiasToFp =  (`NE-1)'(`D_BIAS);
-              2'h0: NewBiasToFp =  (`NE-1)'(`S_BIAS);
-              2'h2: NewBiasToFp =  (`NE-1)'(`H_BIAS);
+              2'h3: NewBiasToFp =  (P.NE-1)'(P.Q_BIAS);
+              2'h1: NewBiasToFp =  (P.NE-1)'(P.D_BIAS);
+              2'h0: NewBiasToFp =  (P.NE-1)'(P.S_BIAS);
+              2'h2: NewBiasToFp =  (P.NE-1)'(P.H_BIAS);
          endcase
-      assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
+      assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp; 
  end


  // select the old exponent
  //      int -> fp : largest bias + XLEN-1
  //      fp -> ??? : XExp
-  assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
+  assign OldExp = IntToFp ? (P.NE)'(P.BIAS)+(P.NE)'(P.XLEN-1) : Xe;
  
  // calculate CalcExp
  //      fp -> fp : 
@ -159,13 +157,13 @@ module fcvt (
  //              - correct the expoent when there is a normalization shift ( + LeadingZeros+1) 
  //              - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction
  //      fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1)
-  //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
+  //          |  P.XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
  //          process:
  //              - start
-  //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
+  //                  |  P.XLEN  zeros     |     Mantissa      | 0's if nessisary |
  //
  //              - shift left 1 (1)
-  //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
+  //                  | P.XLEN-1 zeros |bit|     frac      | 0's if nessisary |
  //                                      . <- binary point
  //
  //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
@ -185,13 +183,13 @@ module fcvt (
  //                  - newBias to make the biased exponent
  //
  //          oldexp         - biasold         - LeadingZeros                               + newbias
-  assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
+  assign Ce = {1'b0, OldExp} - (P.NE+1)'(P.BIAS) - {{P.NE-P.LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};


  // find if the result is dnormal or underflows
  //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
  //      - can't underflow an integer to Fp conversion
-  assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp;
+  assign ResSubnormUf = (~|Ce | Ce[P.NE])&~XZero&~IntToFp;


  ///////////////////////////////////////////////////////////////////////////
@ -211,8 +209,8 @@ module fcvt (
  //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
  //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
  always_comb
-      if(ToInt)                       ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
-      else if (ResSubnormUf)  ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
+      if(ToInt)                       ShiftAmt = Ce[P.LOGCVTLEN-1:0]&{P.LOGCVTLEN{~Ce[P.NE]}};
+      else if (ResSubnormUf)  ShiftAmt = (P.LOGCVTLEN)'(P.NF-1)+Ce[P.LOGCVTLEN-1:0];
      else                            ShiftAmt = LeadingZeros;

      
@ -227,7 +225,7 @@ module fcvt (
  //      - otherwise: the floating point input's sign
  always_comb
      if(IntToFp)
-          if(Int64)   Cs = Int[`XLEN-1]&Signed;
+          if(Int64)   Cs = Int[P.XLEN-1]&Signed;
          else        Cs = Int[31]&Signed;
      else            Cs = Xs;

--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@ -26,15 +26,13 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrt(
+module fdivsqrt import cvw::*;  #(parameter cvw_t P) (
  input  logic                clk, 
  input  logic                reset, 
-  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic [P.FMTBITS-1:0] FmtE,
  input  logic                XsE,
-  input  logic [`NF:0]        XmE, YmE,
-  input  logic [`NE-1:0]      XeE, YeE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
  input  logic                XInfE, YInfE, 
  input  logic                XZeroE, YZeroE, 
  input  logic                XNaNE, YNaNE, 
@ -42,39 +40,39 @@ module fdivsqrt(
  input  logic                StallM,
  input  logic                FlushE,
  input  logic                SqrtE, SqrtM,
-  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
  input  logic [2:0]          Funct3E, Funct3M,
  input  logic                IntDivE, W64E,
  output logic                DivStickyM,
  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
-  output logic [`NE+1:0]      QeM,
-  output logic [`DIVb:0]      QmM,
-  output logic [`XLEN-1:0]    FIntDivResultM
+  output logic [P.NE+1:0]      QeM,
+  output logic [P.DIVb:0]      QmM,
+  output logic [P.XLEN-1:0]    FIntDivResultM
 );

  // Floating-point division and square root module, with optional integer division and remainder
  // Computes X/Y, sqrt(X), A/B, or A%B

-  logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
-  logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb+3:0]           D;                            // Iterator Divisor
-  logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
-  logic [`DIVb+1:0]           FirstC;                       // Step tracker
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
  logic                       Firstun;                      // Quotient selection
  logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic [P.DURLEN-1:0]         CyclesE;                      // FSM cycles
  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
  logic                       DivStartE;                    // Enable signal for flops during stall
                                                            
  // Integer div/rem signals                                
  logic                       BZeroM;                       // Denominator is zero
  logic                       IntDivM;                      // Integer operation
-  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
  logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
-  logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
  logic                       ISpecialCaseE;                // Integer div/remainder special cases

-  fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
+  fdivsqrtpreproc #(P) fdivsqrtpreproc(                          // Preprocessor
    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
    // Int-specific 
@ -82,18 +80,18 @@ module fdivsqrt(
    .BZeroM, .nM, .mM, .AM, 
    .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);

-  fdivsqrtfsm fdivsqrtfsm(                                  // FSM
+  fdivsqrtfsm #(P) fdivsqrtfsm(                                  // FSM
    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
    .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
    // Int-specific 
    .IDivStartE, .ISpecialCaseE, .IntDivE);

-  fdivsqrtiter fdivsqrtiter(                                // CSA Iterator
+  fdivsqrtiter #(P) fdivsqrtiter(                                // CSA Iterator
    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
    .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));

-  fdivsqrtpostproc fdivsqrtpostproc(                        // Postprocessor
+  fdivsqrtpostproc #(P) fdivsqrtpostproc(                        // Postprocessor
    .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
    .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, 
    .QmM, .WZeroE, .DivStickyM, 
--- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@ -26,51 +26,49 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtcycles(
-  input  logic [`FMTBITS-1:0] FmtE,
+module fdivsqrtcycles import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0] FmtE,
  input  logic                SqrtE,
  input  logic                IntDivE,
-  input  logic [`DIVBLEN:0]   nE,
-  output logic [`DURLEN-1:0]  CyclesE
+  input  logic [P.DIVBLEN:0]   nE,
+  output logic [P.DURLEN-1:0]  CyclesE
 );
-  logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
-  // DIVN = `NF+3
+  logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
+  // DIVN = P.NF+3
  // NS = NF + 1
  // N = NS or NS+2 for div/sqrt.

  /* verilator lint_off WIDTH */
-  if (`FPSIZES == 1)
-    assign Nf = `NF;
-  else if (`FPSIZES == 2)
+  if (P.FPSIZES == 1)
+    assign Nf = P.NF;
+  else if (P.FPSIZES == 2)
    always_comb
      case (FmtE)
-        1'b0: Nf = `NF1;
-        1'b1: Nf = `NF;
+        1'b0: Nf = P.NF1;
+        1'b1: Nf = P.NF;
      endcase
-  else if (`FPSIZES == 3)
+  else if (P.FPSIZES == 3)
    always_comb
      case (FmtE)
-        `FMT:  Nf = `NF;
-        `FMT1: Nf = `NF1;
-        `FMT2: Nf = `NF2; 
+        P.FMT:  Nf = P.NF;
+        P.FMT1: Nf = P.NF1;
+        P.FMT2: Nf = P.NF2; 
      endcase
-  else if (`FPSIZES == 4)  
+  else if (P.FPSIZES == 4)  
    always_comb
      case(FmtE)
-        `S_FMT: Nf = `S_NF;
-        `D_FMT: Nf = `D_NF;
-        `H_FMT: Nf = `H_NF;
-        `Q_FMT: Nf = `Q_NF;
+        P.S_FMT: Nf = P.S_NF;
+        P.D_FMT: Nf = P.D_NF;
+        P.H_FMT: Nf = P.H_NF;
+        P.Q_FMT: Nf = P.Q_NF;
      endcase 

  always_comb begin 
    if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
-    else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
-    if (`IDIV_ON_FPU) CyclesE =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-    else              CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    else       fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
+    if (P.IDIV_ON_FPU) CyclesE =  IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
+    else              CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
  end 
  /* verilator lint_on WIDTH */

-endmodule
+endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv
@ -26,49 +26,47 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtexpcalc(
-  input  logic [`FMTBITS-1:0] Fmt,
-  input  logic [`NE-1:0]      Xe, Ye,
+module fdivsqrtexpcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0] Fmt,
+  input  logic [P.NE-1:0]      Xe, Ye,
  input  logic                Sqrt,
  input  logic                XZero, 
-  input  logic [`DIVBLEN:0]   ell, m,
-  output logic [`NE+1:0]      Qe
+  input  logic [P.DIVBLEN:0]   ell, m,
+  output logic [P.NE+1:0]      Qe
  );
-  logic [`NE-2:0] Bias;
-  logic [`NE+1:0] SXExp;
-  logic [`NE+1:0] SExp;
-  logic [`NE+1:0] DExp;
+  logic [P.NE-2:0] Bias;
+  logic [P.NE+1:0] SXExp;
+  logic [P.NE+1:0] SExp;
+  logic [P.NE+1:0] DExp;
  
-  if (`FPSIZES == 1) begin
-    assign Bias = (`NE-1)'(`BIAS); 
+  if (P.FPSIZES == 1) begin
+    assign Bias = (P.NE-1)'(P.BIAS); 

-  end else if (`FPSIZES == 2) begin
-    assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+  end else if (P.FPSIZES == 2) begin
+    assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); 

-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
    always_comb
      case (Fmt)
-        `FMT: Bias  =  (`NE-1)'(`BIAS);
-        `FMT1: Bias = (`NE-1)'(`BIAS1);
-        `FMT2: Bias = (`NE-1)'(`BIAS2);
+        P.FMT: Bias  =  (P.NE-1)'(P.BIAS);
+        P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
+        P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
        default: Bias = 'x;
      endcase

-  end else if (`FPSIZES == 4) begin        
+  end else if (P.FPSIZES == 4) begin        
  always_comb
    case (Fmt)
-      2'h3: Bias =  (`NE-1)'(`Q_BIAS);
-      2'h1: Bias =  (`NE-1)'(`D_BIAS);
-      2'h0: Bias =  (`NE-1)'(`S_BIAS);
-      2'h2: Bias =  (`NE-1)'(`H_BIAS);
+      2'h3: Bias =  (P.NE-1)'(P.Q_BIAS);
+      2'h1: Bias =  (P.NE-1)'(P.D_BIAS);
+      2'h0: Bias =  (P.NE-1)'(P.S_BIAS);
+      2'h2: Bias =  (P.NE-1)'(P.H_BIAS);
    endcase
  end
-  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
-  assign SExp  = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
+  assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
+  assign SExp  = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
  
  // correct exponent for subnormal input's normalization shifts
-  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}); 
+  assign DExp  = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); 
  assign Qe = Sqrt ? SExp : DExp;
 endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtfgen2 (
+module fdivsqrtfgen2 import cvw::*;  #(parameter cvw_t P) (
  input  logic             up, uz,
-  input  logic [`DIVb+3:0] C, U, UM,
-  output logic [`DIVb+3:0] F
+  input  logic [P.DIVb+3:0] C, U, UM,
+  output logic [P.DIVb+3:0] F
 );
-  logic [`DIVb+3:0] FP, FN, FZ;
+  logic [P.DIVb+3:0] FP, FN, FZ;

  // Generate for both positive and negative bits
  assign FP = ~(U << 1) & C;
--- a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtfgen4 (
+module fdivsqrtfgen4 import cvw::*;  #(parameter cvw_t P) (
  input  logic [3:0]       udigit,
-  input  logic [`DIVb+3:0] C, U, UM,
-  output logic [`DIVb+3:0] F
+  input  logic [P.DIVb+3:0] C, U, UM,
+  output logic [P.DIVb+3:0] F
 );
-  logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
+  logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
  
  // Generate for both positive and negative bits
  assign F2  = (~U << 2) & (C << 2);
@ -49,4 +47,4 @@ module fdivsqrtfgen4 (
    else if (udigit[1])  F = FN1;
    else if (udigit[0])  F = FN2;
    else                 F = F0;
-endmodule
+endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@ -26,9 +26,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtfsm(
+module fdivsqrtfsm import cvw::*;  #(parameter cvw_t P) (
  input  logic               clk, reset, 
  input  logic               XInfE, YInfE, 
  input  logic               XZeroE, YZeroE, 
@ -39,7 +37,7 @@ module fdivsqrtfsm(
  input  logic               StallM, FlushE,
  input  logic               IntDivE,
  input  logic               ISpecialCaseE,
-  input  logic [`DURLEN-1:0] CyclesE,
+  input  logic [P.DURLEN-1:0] CyclesE,
  output logic               IFDivStartE,
  output logic               FDivBusyE, FDivDoneE,
  output logic               SpecialCaseM
@ -49,16 +47,16 @@ module fdivsqrtfsm(
  statetype state;

  logic SpecialCaseE, FSpecialCaseE;
-  logic [`DURLEN-1:0] step;
+  logic [P.DURLEN-1:0] step;

  // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
-  assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
+  assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
  assign FDivDoneE = (state == DONE);
  assign FDivBusyE = (state == BUSY) | IFDivStartE; 
 
  // terminate immediately on special cases
  assign FSpecialCaseE = XZeroE | | XInfE  | XNaNE |  (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
-  if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
+  if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
  else              assign SpecialCaseE = FSpecialCaseE;
  flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc

@ -78,4 +76,4 @@ module fdivsqrtfsm(
      end 
  end

-endmodule
+endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv
@ -26,38 +26,36 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtiter(
+module fdivsqrtiter import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk,
  input  logic             IFDivStartE, 
  input  logic             FDivBusyE, 
  input  logic             SqrtE,
-  input  logic [`DIVb+3:0] X, D,
-  output logic [`DIVb:0]   FirstU, FirstUM,
-  output logic [`DIVb+1:0] FirstC,
+  input  logic [P.DIVb+3:0] X, D,
+  output logic [P.DIVb:0]   FirstU, FirstUM,
+  output logic [P.DIVb+1:0] FirstC,
  output logic             Firstun,
-  output logic [`DIVb+3:0] FirstWS, FirstWC
+  output logic [P.DIVb+3:0] FirstWS, FirstWC
 );

  /* verilator lint_off UNOPTFLAT */
-  logic [`DIVb+3:0]      WSNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]      WCNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]      WS[`DIVCOPIES:0];       // Q4.b
-  logic [`DIVb+3:0]      WC[`DIVCOPIES:0];       // Q4.b
-  logic [`DIVb:0]        U[`DIVCOPIES:0];        // U1.b
-  logic [`DIVb:0]        UM[`DIVCOPIES:0];       // U1.b
-  logic [`DIVb:0]        UNext[`DIVCOPIES-1:0];  // U1.b
-  logic [`DIVb:0]        UMNext[`DIVCOPIES-1:0]; // U1.b
-  logic [`DIVb+1:0]      C[`DIVCOPIES:0];        // Q2.b
-  logic [`DIVb+1:0]      initC;                  // Q2.b
-  logic [`DIVCOPIES-1:0] un; 
+  logic [P.DIVb+3:0]      WSNext[P.DIVCOPIES-1:0]; // Q4.b
+  logic [P.DIVb+3:0]      WCNext[P.DIVCOPIES-1:0]; // Q4.b
+  logic [P.DIVb+3:0]      WS[P.DIVCOPIES:0];       // Q4.b
+  logic [P.DIVb+3:0]      WC[P.DIVCOPIES:0];       // Q4.b
+  logic [P.DIVb:0]        U[P.DIVCOPIES:0];        // U1.b
+  logic [P.DIVb:0]        UM[P.DIVCOPIES:0];       // U1.b
+  logic [P.DIVb:0]        UNext[P.DIVCOPIES-1:0];  // U1.b
+  logic [P.DIVb:0]        UMNext[P.DIVCOPIES-1:0]; // U1.b
+  logic [P.DIVb+1:0]      C[P.DIVCOPIES:0];        // Q2.b
+  logic [P.DIVb+1:0]      initC;                  // Q2.b
+  logic [P.DIVCOPIES-1:0] un; 

-  logic [`DIVb+3:0]      WSN, WCN;               // Q4.b
-  logic [`DIVb+3:0]      DBar, D2, DBar2;        // Q4.b
-  logic [`DIVb+1:0]      NextC;
-  logic [`DIVb:0]        UMux, UMMux;
-  logic [`DIVb:0]        initU, initUM;
+  logic [P.DIVb+3:0]      WSN, WCN;               // Q4.b
+  logic [P.DIVb+3:0]      DBar, D2, DBar2;        // Q4.b
+  logic [P.DIVb+1:0]      NextC;
+  logic [P.DIVb:0]        UMux, UMMux;
+  logic [P.DIVb:0]        initU, initUM;
  /* verilator lint_on UNOPTFLAT */

  // Top Muxes and Registers
@ -66,36 +64,36 @@ module fdivsqrtiter(
  // are fed back for the next iteration.
 
  // Residual WS/SC registers/initializaiton mux
-  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
-  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
-  flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
-  flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
+  mux2   #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN);
+  mux2   #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN);
+  flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
+  flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);

  // UOTFC Result U and UM registers/initialization mux
  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
-  assign initU  = {SqrtE, {(`DIVb){1'b0}}};
-  assign initUM = {~SqrtE, {(`DIVb){1'b0}}};
-  mux2   #(`DIVb+1)  Umux(UNext[`DIVCOPIES-1],  initU,  IFDivStartE, UMux);
-  mux2   #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
-  flopen #(`DIVb+1)  UReg(clk, FDivBusyE, UMux,  U[0]);
-  flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
+  assign initU  = {SqrtE, {(P.DIVb){1'b0}}};
+  assign initUM = {~SqrtE, {(P.DIVb){1'b0}}};
+  mux2   #(P.DIVb+1)  Umux(UNext[P.DIVCOPIES-1],  initU,  IFDivStartE, UMux);
+  mux2   #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux);
+  flopen #(P.DIVb+1)  UReg(clk, FDivBusyE, UMux,  U[0]);
+  flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);

  // C register/initialization mux
  // Initialize C to -1 for sqrt and -R for division
  logic [1:0] initCUpper;
-  if(`RADIX == 4) begin
+  if(P.RADIX == 4) begin
    mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
  end else begin
    mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
  end
  
-  assign initC = {initCUpper, {`DIVb{1'b0}}};
-  mux2   #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); 
-  flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
+  assign initC = {initCUpper, {P.DIVb{1'b0}}};
+  mux2   #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC); 
+  flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);

  // Divisor Selections
  assign DBar    = ~D;        // for -D
-  if(`RADIX == 4) begin : d2
+  if(P.RADIX == 4) begin : d2
    assign D2    = D << 1;    // for 2D,  only used in R4
    assign DBar2 = ~D2;       // for -2D, only used in R4
  end
@ -103,15 +101,15 @@ module fdivsqrtiter(
  // k=DIVCOPIES of the recurrence logic
  genvar i;
  generate
-    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
-      if (`RADIX == 2) begin: stage
-        fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE,
+    for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
+      if (P.RADIX == 2) begin: stage
+        fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
        .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
        .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
      end else begin: stage
        logic j1;
-        assign j1 = (i == 0 & ~C[0][`DIVb-1]);
-        fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
+        assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
+        fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
        .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), 
        .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
      end
--- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -26,51 +26,49 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtpostproc(
+module fdivsqrtpostproc import cvw::*;  #(parameter cvw_t P) (
  input  logic              clk, reset,
  input  logic              StallM,
-  input  logic [`DIVb+3:0]  WS, WC,
-  input  logic [`DIVb+3:0]  D, 
-  input  logic [`DIVb:0]    FirstU, FirstUM, 
-  input  logic [`DIVb+1:0]  FirstC,
+  input  logic [P.DIVb+3:0]  WS, WC,
+  input  logic [P.DIVb+3:0]  D, 
+  input  logic [P.DIVb:0]    FirstU, FirstUM, 
+  input  logic [P.DIVb+1:0]  FirstC,
  input  logic              SqrtE,
  input  logic              Firstun, SqrtM, SpecialCaseM, NegQuotM,
-  input  logic [`XLEN-1:0]  AM,
+  input  logic [P.XLEN-1:0]  AM,
  input  logic              RemOpM, ALTBM, BZeroM, AsM, W64M,
-  input  logic [`DIVBLEN:0] nM, mM,
-  output logic [`DIVb:0]    QmM, 
+  input  logic [P.DIVBLEN:0] nM, mM,
+  output logic [P.DIVb:0]    QmM, 
  output logic              WZeroE,
  output logic              DivStickyM,
-  output logic [`XLEN-1:0]  FIntDivResultM
+  output logic [P.XLEN-1:0]  FIntDivResultM
 );
  
-  logic [`DIVb+3:0]         W, Sum;
-  logic [`DIVb:0]           PreQmM;
+  logic [P.DIVb+3:0]         W, Sum;
+  logic [P.DIVb:0]           PreQmM;
  logic                     NegStickyM;
  logic                     weq0E, WZeroM;
-  logic [`XLEN-1:0]         IntDivResultM;
+  logic [P.XLEN-1:0]         IntDivResultM;

  //////////////////////////
  // Execute Stage: Detect early termination for an exact result
  //////////////////////////

  // check for early termination on an exact result. 
-  aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E);
+  aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);

-  if (`RADIX == 2) begin: R2EarlyTerm
-    logic [`DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
-    logic [`DIVb+2:0] FirstK;
+  if (P.RADIX == 2) begin: R2EarlyTerm
+    logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
+    logic [P.DIVb+2:0] FirstK;
    logic wfeq0E;
-    logic [`DIVb+3:0] WCF, WSF;
+    logic [P.DIVb+3:0] WCF, WSF;

    assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
-    assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0};    // F for square root
+    assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0};    // F for square root
    assign FZeroDivE =  D << 1;                                    // F for divide
-    mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
-    csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
-    aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
+    mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
+    csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
+    aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
    assign WZeroE = weq0E|(wfeq0E & Firstun);
  end else begin
    assign WZeroE = weq0E;
@ -91,27 +89,27 @@ module fdivsqrtpostproc(

  // Determine if sticky bit is negative  // *** look for ways to optimize this.  Shift shouldn't be needed.
  assign Sum = WC + WS;
-  assign NegStickyM = Sum[`DIVb+3];
-  mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
-  mux2 #(`DIVb+1)    qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
+  assign NegStickyM = Sum[P.DIVb+3];
+  mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
+  mux2 #(P.DIVb+1)    qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);

  // Integer quotient or remainder correctoin, normalization, and special cases
-  if (`IDIV_ON_FPU) begin:intpostproc // Int supported
-    logic [`DIVBLEN:0] NormShiftM;
-    logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
-    logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
+  if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
+    logic [P.DIVBLEN:0] NormShiftM;
+    logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
+    logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;

-    assign W = $signed(Sum) >>> `LOGR;
+    assign W = $signed(Sum) >>> P.LOGR;
    assign UnsignedQuotM = {3'b000, PreQmM};

    // Integer remainder: sticky and sign correction muxes
-    mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
-    mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
-    mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
+    mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
+    mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
+    mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);

    // Select quotient or remainder and do normalization shift
-    mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM);
-    mux2 #(`DIVb+4)    presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
+    mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
+    mux2 #(P.DIVb+4)    presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
    assign PreIntResultM = $signed(PreResultM >>> NormShiftM); 

    // special case logic
@ -119,18 +117,18 @@ module fdivsqrtpostproc(
    always_comb
      if (BZeroM) begin         // Divide by zero
        if (RemOpM) IntDivResultM = AM;  
-        else        IntDivResultM = {(`XLEN){1'b1}};
+        else        IntDivResultM = {(P.XLEN){1'b1}};
     end else if (ALTBM) begin // Numerator is zero
        if (RemOpM) IntDivResultM = AM;
        else        IntDivResultM = '0;
-     end else       IntDivResultM = PreIntResultM[`XLEN-1:0];
+     end else       IntDivResultM = PreIntResultM[P.XLEN-1:0];

    // sign extend result for W64
-    if (`XLEN==64) begin
-      mux2 #(64) resmux(IntDivResultM[`XLEN-1:0], 
-        {{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
+    if (P.XLEN==64) begin
+      mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], 
+        {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
        W64M, FIntDivResultM);
    end else 
-      assign FIntDivResultM = IntDivResultM[`XLEN-1:0];
+      assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
  end
 endmodule
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -26,56 +26,54 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtpreproc (
+module fdivsqrtpreproc import cvw::*;  #(parameter cvw_t P) (
  input  logic                clk,
  input  logic                IFDivStartE, 
-  input  logic [`NF:0]        Xm, Ym,
-  input  logic [`NE-1:0]      Xe, Ye,
-  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic [P.NF:0]        Xm, Ym,
+  input  logic [P.NE-1:0]      Xe, Ye,
+  input  logic [P.FMTBITS-1:0] FmtE,
  input  logic                SqrtE,
  input  logic                XZeroE,
  input  logic [2:0]          Funct3E,
-  output logic [`NE+1:0]      QeM,
-  output logic [`DIVb+3:0]    X, D,
+  output logic [P.NE+1:0]      QeM,
+  output logic [P.DIVb+3:0]    X, D,
  // Int-specific
-  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
  input  logic                IntDivE, W64E,
  output logic                ISpecialCaseE,
-  output logic [`DURLEN-1:0]  CyclesE,
-  output logic [`DIVBLEN:0]   nM, mM,
+  output logic [P.DURLEN-1:0]  CyclesE,
+  output logic [P.DIVBLEN:0]   nM, mM,
  output logic                NegQuotM, ALTBM, IntDivM, W64M,
  output logic                AsM, BZeroM,
-  output logic [`XLEN-1:0]    AM
+  output logic [P.XLEN-1:0]    AM
 );

-  logic [`DIVb-1:0]           Xfract, Dfract;
-  logic [`DIVb:0]             PreSqrtX;
-  logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
-  logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
-  logic [`DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
-  logic [`DIVBLEN:0]          mE, nE, ell;                         // Leading zeros of inputs
+  logic [P.DIVb-1:0]           Xfract, Dfract;
+  logic [P.DIVb:0]             PreSqrtX;
+  logic [P.DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
+  logic [P.NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
+  logic [P.DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
+  logic [P.DIVBLEN:0]          mE, nE, ell;                         // Leading zeros of inputs
  logic                       NumerZeroE;                          // Numerator is zero (X or A)
  logic                       AZeroE, BZeroE;                      // A or B is Zero for integer division
  logic                       SignedDivE;                          // signed division
  logic                       NegQuotE;                            // Integer quotient is negative
  logic                       AsE, BsE;                            // Signs of integer inputs
-  logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
+  logic [P.XLEN-1:0]           AE;                                  // input A after W64 adjustment
  logic  ALTBE;

  //////////////////////////////////////////////////////
  // Integer Preprocessing
  //////////////////////////////////////////////////////

-  if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
-    logic [`XLEN-1:0] BE, PosA, PosB;
+  if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
+    logic [P.XLEN-1:0] BE, PosA, PosB;

    // Extract inputs, signs, zero, depending on W64 mode if applicable
    assign SignedDivE = ~Funct3E[0];
  
    // Source handling
-    if (`XLEN==64) begin // 64-bit, supports W64
+    if (P.XLEN==64) begin // 64-bit, supports W64
      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
    end else begin // 32 bits only
@ -84,21 +82,21 @@ module fdivsqrtpreproc (
     end
    assign AZeroE = ~(|AE);
    assign BZeroE = ~(|BE);
-    assign AsE = AE[`XLEN-1] & SignedDivE;
-    assign BsE = BE[`XLEN-1] & SignedDivE; 
+    assign AsE = AE[P.XLEN-1] & SignedDivE;
+    assign BsE = BE[P.XLEN-1] & SignedDivE; 
    assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative

    // Force integer inputs to be postiive
-    mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA);
-    mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB);
+    mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
+    mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);

    // Select integer or floating point inputs
-    mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
-    mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
+    mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
+    mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
  end else begin // Int not supported
-    assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
-    assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
+    assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
+    assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
    assign NumerZeroE = XZeroE;
  end

@ -107,8 +105,8 @@ module fdivsqrtpreproc (
  //////////////////////////////////////////////////////

  // count leading zeros for Subnorm FP and to normalize integer inputs
-  lzc #(`DIVb) lzcX (IFX, ell);
-  lzc #(`DIVb) lzcY (IFD, mE);
+  lzc #(P.DIVb) lzcX (IFX, ell);
+  lzc #(P.DIVb) lzcY (IFD, mE);

  // Normalization shift: shift off leading one
  assign Xfract = (IFX << ell) << 1;
@ -122,28 +120,28 @@ module fdivsqrtpreproc (
  //  and nE (number of fractional digits)
  //////////////////////////////////////////////////////

-  if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
-    logic [`DIVBLEN:0] ZeroDiff, p;
+  if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
+    logic [P.DIVBLEN:0] ZeroDiff, p;

    // calculate number of fractional bits p
    assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
-    assign ALTBE = ZeroDiff[`DIVBLEN];  // A less than B (A has more leading zeros)
-    mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);              
+    assign ALTBE = ZeroDiff[P.DIVBLEN];  // A less than B (A has more leading zeros)
+    mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);              

    // Integer special cases (terminate immediately)
    assign ISpecialCaseE = BZeroE | ALTBE;

    // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps

-    if (`LOGRK > 0) begin // more than 1 bit per cycle
-      logic [`LOGRK-1:0] IntTrunc, RightShiftX;
-      logic [`DIVBLEN:0] TotalIntBits, IntSteps;
+    if (P.LOGRK > 0) begin // more than 1 bit per cycle
+      logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
+      logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
      /* verilator lint_off WIDTH */
-      assign TotalIntBits = `LOGR + p;                            // Total number of result bits (r integer bits plus p fractional bits)
-      assign IntTrunc = TotalIntBits % `RK;                       // Truncation check for ceiling operator
-      assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc;     // Number of steps for int div
-      assign nE = (IntSteps * `DIVCOPIES) - 1;                    // Fractional digits
-      assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK);  // Right shift amount
+      assign TotalIntBits = P.LOGR + p;                            // Total number of result bits (r integer bits plus p fractional bits)
+      assign IntTrunc = TotalIntBits % P.RK;                       // Truncation check for ceiling operator
+      assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc;     // Number of steps for int div
+      assign nE = (IntSteps * P.DIVCOPIES) - 1;                    // Fractional digits
+      assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK);  // Right shift amount
      assign DivXShifted = DivX >> RightShiftX;                   // shift X by up to R*K-1 to complete in nE steps
      /* verilator lint_on WIDTH */
    end else begin // radix 2 1 copy doesn't require shifting
@ -167,42 +165,42 @@ module fdivsqrtpreproc (
  assign DivX = {3'b000, ~NumerZeroE, Xfract};

  // Sqrt is initialized on step one as R(X-1), so depends on Radix
-  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
-  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
+  mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
+  if (P.RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
-  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
+  mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
  
  //////////////////////////////////////////////////////
  // Selet integer or floating-point operands
  //////////////////////////////////////////////////////

-  if (`IDIV_ON_FPU) begin
-    mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  if (P.IDIV_ON_FPU) begin
+    mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
  end else begin
    assign X = PreShiftX;
  end

   // Divisior register
-  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
+  flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
 
  // Floating-point exponent
-  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
-  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
+  fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
+  flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);

  // Number of FSM cycles (to FSM)
-  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
+  fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);

-  if (`IDIV_ON_FPU) begin:intpipelineregs
+  if (P.IDIV_ON_FPU) begin:intpipelineregs
    // pipeline registers
    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
    flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
    flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
    flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
    flopen #(1)      asignreg(clk, IFDivStartE, AsE,      AsM);
-    flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE,       nM); 
-    flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE,       mM);
-    flopen #(`XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
-    if (`XLEN==64) 
+    flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE,       nM); 
+    flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE,       mM);
+    flopen #(P.XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
+    if (P.XLEN==64) 
      flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
  end

--- a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtqsel2.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module fdivsqrtqsel2 ( 
  input  logic [3:0] ps, pc, 
  output logic       up, uz, un
--- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module fdivsqrtqsel4 (
  input  logic [2:0] Dmsbs,
  input  logic [4:0] Smsbs,
--- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module fdivsqrtqsel4cmp (
  input  logic [2:0] Dmsbs,
  input  logic [4:0] Smsbs,
--- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@ -26,27 +26,26 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"

 /* verilator lint_off UNOPTFLAT */
-module fdivsqrtstage2 (
-  input  logic [`DIVb+3:0] D, DBar, 
-  input  logic [`DIVb:0]   U, UM,
-  input  logic [`DIVb+3:0] WS, WC,
-  input  logic [`DIVb+1:0] C,
+module fdivsqrtstage2 import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.DIVb+3:0] D, DBar, 
+  input  logic [P.DIVb:0]   U, UM,
+  input  logic [P.DIVb+3:0] WS, WC,
+  input  logic [P.DIVb+1:0] C,
  input  logic             SqrtE,
  output logic             un,
-  output logic [`DIVb+1:0] CNext,
-  output logic [`DIVb:0]   UNext, UMNext, 
-  output logic [`DIVb+3:0] WSNext, WCNext
+  output logic [P.DIVb+1:0] CNext,
+  output logic [P.DIVb:0]   UNext, UMNext, 
+  output logic [P.DIVb+3:0] WSNext, WCNext
 );
 /* verilator lint_on UNOPTFLAT */

-  logic [`DIVb+3:0]        Dsel;
+  logic [P.DIVb+3:0]        Dsel;
  logic                    up, uz;
-  logic [`DIVb+3:0]        F;
-  logic [`DIVb+3:0]        AddIn;
-  logic [`DIVb+3:0]        WSA, WCA;
+  logic [P.DIVb+3:0]        F;
+  logic [P.DIVb+3:0]        AddIn;
+  logic [P.DIVb+3:0]        WSA, WCA;

  // Qmient Selection logic
  // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
@ -56,10 +55,10 @@ module fdivsqrtstage2 (
  // 0000 =  0
  // 0010 = -1
  // 0001 = -2
-  fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
+  fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);

  // Sqrt F generation.  Extend C, U, UM to Q4.k
-  fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
+  fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);

  // Divisor multiple
  always_comb
@ -69,16 +68,16 @@ module fdivsqrtstage2 (

  // Partial Product Generation
  //  WSA, WCA = WS + WC - qD
-  mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
-  csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
+  mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
+  csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
  assign WSNext = WSA << 1;
  assign WCNext = WCA << 1;

  // Shift thermometer code C
-  assign CNext = {1'b1, C[`DIVb+1:1]};
+  assign CNext = {1'b1, C[P.DIVb+1:1]};

  // Unified On-The-Fly Converter to accumulate result
-  fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
+  fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
 endmodule


--- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@ -26,29 +26,27 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtstage4 (
-  input  logic [`DIVb+3:0] D, DBar, D2, DBar2,
-  input  logic [`DIVb:0]   U,UM,
-  input  logic [`DIVb+3:0] WS, WC,
-  input  logic [`DIVb+1:0] C,
+module fdivsqrtstage4 import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.DIVb+3:0] D, DBar, D2, DBar2,
+  input  logic [P.DIVb:0]   U,UM,
+  input  logic [P.DIVb+3:0] WS, WC,
+  input  logic [P.DIVb+1:0] C,
  input  logic             SqrtE, j1,
-  output logic [`DIVb+1:0] CNext,
+  output logic [P.DIVb+1:0] CNext,
  output logic             un,
-  output logic [`DIVb:0]   UNext, UMNext, 
-  output logic [`DIVb+3:0] WSNext, WCNext
+  output logic [P.DIVb:0]   UNext, UMNext, 
+  output logic [P.DIVb+3:0] WSNext, WCNext
 );

-  logic [`DIVb+3:0]        Dsel;
+  logic [P.DIVb+3:0]        Dsel;
  logic [3:0]              udigit;
-  logic [`DIVb+3:0]        F;
-  logic [`DIVb+3:0]        AddIn;
+  logic [P.DIVb+3:0]        F;
+  logic [P.DIVb+3:0]        AddIn;
  logic [4:0]              Smsbs;
  logic [2:0]              Dmsbs;
  logic [7:0]              WCmsbs, WSmsbs;
  logic                    CarryIn;
-  logic [`DIVb+3:0]        WSA, WCA;
+  logic [P.DIVb+3:0]        WSA, WCA;

  // Digit Selection logic
  // u encoding:
@ -57,16 +55,16 @@ module fdivsqrtstage4 (
  // 0000 =  0
  // 0010 = -1
  // 0001 = -2
-  assign Smsbs  = U[`DIVb:`DIVb-4];
-  assign Dmsbs  = D[`DIVb-1:`DIVb-3];
-  assign WCmsbs = WC[`DIVb+3:`DIVb-4];
-  assign WSmsbs = WS[`DIVb+3:`DIVb-4];
+  assign Smsbs  = U[P.DIVb:P.DIVb-4];
+  assign Dmsbs  = D[P.DIVb-1:P.DIVb-3];
+  assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
+  assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];

  fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
  assign un = 1'b0; // unused for radix 4

  // F generation logic
-  fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
+  fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);

  // Divisor multiple logic
  always_comb
@ -83,15 +81,15 @@ module fdivsqrtstage4 (
  //  {WS, WC}}Next = (WS + WC - qD or F) << 2
  assign AddIn = SqrtE ? F : Dsel;
  assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D 
-  csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
+  csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
  assign WSNext = WSA << 2;
  assign WCNext = WCA << 2;

  // Shift thermometer code C
-  assign CNext = {2'b11, C[`DIVb+1:2]};
+  assign CNext = {2'b11, C[P.DIVb+1:2]};
 
  // On-the-fly converter to accumulate result
-  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
+  fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext);
 endmodule


--- a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv
@ -26,22 +26,20 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 ///////////////////////////////
 // Unified OTFC, Radix 2 //
 ///////////////////////////////
-module fdivsqrtuotfc2(
+module fdivsqrtuotfc2 import cvw::*;  #(parameter cvw_t P) (
  input  logic             up, un,
-  input  logic [`DIVb+1:0] C,
-  input  logic [`DIVb:0]   U, UM,
-  output logic [`DIVb:0]   UNext, UMNext
+  input  logic [P.DIVb+1:0] C,
+  input  logic [P.DIVb:0]   U, UM,
+  output logic [P.DIVb:0]   UNext, UMNext
 );
  //  The on-the-fly converter transfers the divsqrt
  //  bits to the quotient as they come.
-  logic [`DIVb:0] K;
+  logic [P.DIVb:0] K;

-  assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding
+  assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding

  always_comb begin
    if (up) begin
--- a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
@ -26,19 +26,17 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fdivsqrtuotfc4(
+module fdivsqrtuotfc4 import cvw::*;  #(parameter cvw_t P) (
  input  logic [3:0]     udigit,
-  input  logic [`DIVb:0] U, UM,
-  input  logic [`DIVb:0] C,
-  output logic [`DIVb:0] UNext, UMNext
+  input  logic [P.DIVb:0] U, UM,
+  input  logic [P.DIVb:0] C,
+  output logic [P.DIVb:0] UNext, UMNext
 );
  //  The on-the-fly converter transfers the square root 
  //  bits to the quotient as they come.
  //  Use this otfc for division and square root.

-  logic [`DIVb:0] K1, K2, K3;       
+  logic [P.DIVb:0] K1, K2, K3;       
  assign K1 = (C&~(C << 1));        // K
  assign K2 = ((C << 1)&~(C << 2)); // 2K
  assign K3 = (C & ~(C << 2));      // 3K
--- a/src/fpu/fhazard.sv
+++ b/src/fpu/fhazard.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module fhazard(
  input  logic [4:0]  Adr1D, Adr2D, Adr3D,                // read data adresses
  input  logic [4:0]  Adr1E, Adr2E, Adr3E,                // read data adresses
--- a/src/fpu/fma/fma.sv
+++ b/src/fpu/fma/fma.sv
@ -26,22 +26,20 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fma(
+module fma import cvw::*;  #(parameter cvw_t P) (
  input  logic                        Xs, Ys, Zs,             // input's signs
-  input  logic [`NE-1:0]              Xe, Ye, Ze,             // input's biased exponents in B(NE.0) format
-  input  logic [`NF:0]                Xm, Ym, Zm,             // input's significands in U(0.NF) format
+  input  logic [P.NE-1:0]              Xe, Ye, Ze,             // input's biased exponents in B(NE.0) format
+  input  logic [P.NF:0]                Xm, Ym, Zm,             // input's significands in U(0.NF) format
  input  logic                        XZero, YZero, ZZero,    // is the input zero
  input  logic [2:0]                  OpCtrl,                 // operation control
  output logic                        ASticky,                // sticky bit that is calculated during alignment
-  output logic [3*`NF+3:0]            Sm,                     // the positive sum's significand
+  output logic [3*P.NF+3:0]            Sm,                     // the positive sum's significand
  output logic                        InvA,                   // Was A inverted for effective subtraction (P-A or -P+A)
  output logic                        As,                     // the aligned addend's sign (modified Z sign for other opperations)
  output logic                        Ps,                     // the product's sign
  output logic                        Ss,                     // the sum's sign
-  output logic [`NE+1:0]              Se,                     // the sum's exponent
-  output logic [$clog2(3*`NF+5)-1:0]  SCnt                    // normalization shift count
+  output logic [P.NE+1:0]              Se,                     // the sum's exponent
+  output logic [$clog2(3*P.NF+5)-1:0]  SCnt                    // normalization shift count
 );

  //  OpCtrl:
@ -54,12 +52,12 @@ module fma(
  //        110 - add
  //        111 - sub

-  logic [2*`NF+1:0]   Pm;         // the product's significand in U(2.2Nf) format
-  logic [3*`NF+3:0]   Am;         // addend aligned's mantissa for addition in U(NF+4.2NF)
-  logic [3*`NF+3:0]   AmInv;      // aligned addend's mantissa possibly inverted
-  logic [2*`NF+1:0]   PmKilled;   // the product's mantissa possibly killed U(2.2Nf)
+  logic [2*P.NF+1:0]   Pm;         // the product's significand in U(2.2Nf) format
+  logic [3*P.NF+3:0]   Am;         // addend aligned's mantissa for addition in U(NF+4.2NF)
+  logic [3*P.NF+3:0]   AmInv;      // aligned addend's mantissa possibly inverted
+  logic [2*P.NF+1:0]   PmKilled;   // the product's mantissa possibly killed U(2.2Nf)
  logic               KillProd;   // set the product to zero before addition if the product is too small to matter
-  logic [`NE+1:0]     Pe;         // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
+  logic [P.NE+1:0]     Pe;         // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign

  ///////////////////////////////////////////////////////////////////////////////
  // Calculate the product
@ -71,10 +69,10 @@ module fma(
  

  // calculate the product's exponent 
-  fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
+  fmaexpadd #(P) expadd(.Xe, .Ye, .XZero, .YZero, .Pe);

  // multiplication of the mantissa's
-  fmamult mult(.Xm, .Ym, .Pm);
+  fmamult #(P) mult(.Xm, .Ym, .Pm);
  
  // calculate the signs and take the opperation into account
  fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
@ -82,15 +80,15 @@ module fma(
  ///////////////////////////////////////////////////////////////////////////////
  // Alignment shifter
  ///////////////////////////////////////////////////////////////////////////////
-  fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
+  fmaalign #(P) align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
                      
  // ///////////////////////////////////////////////////////////////////////////////
  // // Addition/LZA
  // ///////////////////////////////////////////////////////////////////////////////
      
-  fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
+  fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);

-  fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
+  fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
  
 endmodule

--- a/src/fpu/fma/fmaadd.sv
+++ b/src/fpu/fma/fmaadd.sv
@ -26,25 +26,23 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmaadd(
-  input  logic [3*`NF+3:0]    Am,         // aligned addend's mantissa for addition in U(NF+5.2NF+1)
-  input  logic [`NE-1:0]      Ze,         // exponent of Z
+module fmaadd import cvw::*;  #(parameter cvw_t P) (
+  input  logic [3*P.NF+3:0]    Am,         // aligned addend's mantissa for addition in U(NF+5.2NF+1)
+  input  logic [P.NE-1:0]      Ze,         // exponent of Z
  input  logic                Ps,         // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
-  input  logic [`NE+1:0]      Pe,         // product's exponet
-  input  logic [2*`NF+1:0]    Pm,         // the product's mantissa
+  input  logic [P.NE+1:0]      Pe,         // product's exponet
+  input  logic [2*P.NF+1:0]    Pm,         // the product's mantissa
  input  logic                InvA,       // invert the aligned addend
  input  logic                KillProd,   // should the product be set to 0
  input  logic                ASticky,    // Alighed addend's sticky bit
-  output logic [3*`NF+3:0]    AmInv,      // aligned addend possibly inverted
-  output logic [2*`NF+1:0]    PmKilled,   // the product's mantissa possibly killed
+  output logic [3*P.NF+3:0]    AmInv,      // aligned addend possibly inverted
+  output logic [2*P.NF+1:0]    PmKilled,   // the product's mantissa possibly killed
  output logic                Ss,         // sum's sign    
-  output logic [`NE+1:0]      Se,         // sum's exponent
-  output logic [3*`NF+3:0]    Sm          // the positive sum
+  output logic [P.NE+1:0]      Se,         // sum's exponent
+  output logic [3*P.NF+3:0]    Sm          // the positive sum
 );

-  logic [3*`NF+3:0]    PreSum, NegPreSum; // possibly negitive sum
+  logic [3*P.NF+3:0]    PreSum, NegPreSum; // possibly negitive sum
  logic                NegSum;            // was the sum negitive

  ///////////////////////////////////////////////////////////////////////////////
@ -52,9 +50,9 @@ module fmaadd(
  ///////////////////////////////////////////////////////////////////////////////
  
  // Choose an inverted or non-inverted addend.  Put carry into adder/LZA for addition
-  assign AmInv = {3*`NF+4{InvA}}^Am;
+  assign AmInv = {3*P.NF+4{InvA}}^Am;
  // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
-  assign PmKilled = {2*`NF+2{~KillProd}}&Pm;
+  assign PmKilled = {2*P.NF+2{~KillProd}}&Pm;
  // Do the addition
  //      - calculate a positive and negitive sum in parallel
  // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
@ -63,8 +61,8 @@ module fmaadd(
  //      addend - prod where product is killed (and not exactly zero) then don't add +1 from negation 
  //          ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
  //          in this case this result is only ever selected when InvA=1 so we can remove &InvA
-  assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
-  assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
+  assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
+  assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0};
    
  // Choose the positive sum and accompanying LZA result.
  assign Sm = NegSum ? NegPreSum : PreSum;
--- a/src/fpu/fma/fmaalign.sv
+++ b/src/fpu/fma/fmaalign.sv
@ -27,20 +27,18 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmaalign(
-  input  logic [`NE-1:0]      Xe, Ye, Ze,         // biased exponents in B(NE.0) format
-  input  logic [`NF:0]        Zm,                 // significand in U(0.NF) format]
+module fmaalign import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.NE-1:0]      Xe, Ye, Ze,         // biased exponents in B(NE.0) format
+  input  logic [P.NF:0]        Zm,                 // significand in U(0.NF) format]
  input  logic                XZero, YZero, ZZero,// is the input zero
-  output logic [3*`NF+3:0]    Am,                 // addend aligned for addition in U(NF+5.2NF+1)
+  output logic [3*P.NF+3:0]    Am,                 // addend aligned for addition in U(NF+5.2NF+1)
  output logic                ASticky,            // Sticky bit calculated from the aliged addend
  output logic                KillProd            // should the product be set to zero
 );

-  logic [`NE+1:0]             ACnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
-  logic [4*`NF+3:0]           ZmShifted;      // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
-  logic [4*`NF+3:0]           ZmPreshifted;   // input to the alignment shifter U(NF+5.3NF+1)
+  logic [P.NE+1:0]             ACnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
+  logic [4*P.NF+3:0]           ZmShifted;      // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
+  logic [4*P.NF+3:0]           ZmPreshifted;   // input to the alignment shifter U(NF+5.3NF+1)
  logic                       KillZ;          // should the addend be killed

  ///////////////////////////////////////////////////////////////////////////////
@ -51,16 +49,16 @@ module fmaalign(
  //      - negitive means Z is larger, so shift Z left
  //      - positive means the product is larger, so shift Z right
  // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
-  assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};
+  assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze};

  // Defualt Addition with only inital left shift
  //          |   53'b0    |  106'b(product)  | 1'b0 |
  //          | addnend |

-  assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
+  assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)};
  
-  assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
-  assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));
+  assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
+  assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3));

  always_comb begin
    // If the product is too small to effect the sum, kill the product
@ -68,7 +66,7 @@ module fmaalign(
    //          |   53'b0    |  106'b(product)  | 1'b0 |
    //  | addnend |
    if (KillProd) begin
-        ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
+        ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)};
        ASticky = ~(XZero|YZero);

    // If the addend is too small to effect the addition        
@ -86,12 +84,12 @@ module fmaalign(
    //                                    | addnend |
    end else begin
        ZmShifted = ZmPreshifted >> ACnt;
-        ASticky = |(ZmShifted[`NF-1:0]); 
+        ASticky = |(ZmShifted[P.NF-1:0]); 

    end
  end

-  assign Am = ZmShifted[4*`NF+3:`NF];
+  assign Am = ZmShifted[4*P.NF+3:P.NF];

 endmodule

--- a/src/fpu/fma/fmaexpadd.sv
+++ b/src/fpu/fma/fmaexpadd.sv
@ -26,18 +26,16 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmaexpadd(    
-  input  logic [`NE-1:0]      Xe, Ye,         // input's exponents
+module fmaexpadd import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.NE-1:0]      Xe, Ye,         // input's exponents
  input  logic                XZero, YZero,   // are the inputs zero
-  output logic [`NE+1:0]      Pe              // product's exponent B^(1023)NE+2
+  output logic [P.NE+1:0]      Pe              // product's exponent B^(1023)NE+2
 );

  logic                       PZero;          // is the product zero?
  
  // kill the exponent if the product is zero - either X or Y is 0
  assign PZero = XZero | YZero;
-  assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
+  assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)});

 endmodule
--- a/src/fpu/fma/fmalza.sv
+++ b/src/fpu/fma/fmalza.sv
@ -27,11 +27,9 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmalza #(WIDTH) ( 
+module fmalza #(WIDTH, NF) ( 
  input logic [WIDTH-1:0]             A,      // addend
-  input logic [2*`NF+1:0]             Pm,     // product
+  input logic [2*NF+1:0]             Pm,     // product
  input logic                         Cin,    // carry in
  input logic                         sub,    // subtraction
  output logic [$clog2(WIDTH+1)-1:0]  SCnt    // normalization shift count for the positive result
@ -42,7 +40,7 @@ module fmalza #(WIDTH) (
  logic [WIDTH-1:0]                   P, G, K;        // propagate, generate, kill for each column
  logic [WIDTH-1:0]                   Pp1, Gm1, Km1;  // propagate shifted right by 1, generate/kill shifted left 1

-  assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
+  assign B = {{(NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product

  assign P = A^B;
  assign G = A&B;
--- a/src/fpu/fma/fmamult.sv
+++ b/src/fpu/fma/fmamult.sv
@ -26,11 +26,9 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmamult(
-  input  logic [`NF:0]     Xm, Ym, // x and y significand
-  output logic [2*`NF+1:0] Pm      // product's significand
+module fmamult import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.NF:0]     Xm, Ym, // x and y significand
+  output logic [2*P.NF+1:0] Pm      // product's significand
 );

  assign Pm = Xm * Ym;
--- a/src/fpu/fma/fmasign.sv
+++ b/src/fpu/fma/fmasign.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module fmasign(    
  input  logic [2:0]  OpCtrl,     // opperation contol
  input  logic        Xs, Ys, Zs, // sign of the inputs
--- a/src/fpu/fpu.sv
+++ b/src/fpu/fpu.sv
@ -26,9 +26,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fpu (
+module fpu import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk,
  input  logic             reset,
  // Hazards
@ -44,7 +42,7 @@ module fpu (
  // Execute stage                                         
  input  logic [2:0]       Funct3E,                        // Funct fields of instruction specify type of operations
  input  logic             IntDivE, W64E,                  // Integer division on FPU
-  input  logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
+  input  logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
  input  logic [4:0]       RdE,                            // which FP register to write to (from IEU)
  output logic             FWriteIntE,                     // integer register write enable (to IEU)
  output logic             FCvtIntE,                       // Convert to int (to IEU)
@ -53,16 +51,16 @@ module fpu (
  input  logic [4:0]       RdM,                            // which FP register to write to (from IEU)
  output logic             FRegWriteM,                     // FP register write enable (to privileged unit)
  output logic             FpLoadStoreM,                   // Fp load instruction? (to LSU)
-  output logic [`FLEN-1:0] FWriteDataM,                    // Data to be written to memory (to LSU) 
-  output logic [`XLEN-1:0] FIntResM,                       // data to be written to integer register (to IEU)
+  output logic [P.FLEN-1:0] FWriteDataM,                    // Data to be written to memory (to LSU) 
+  output logic [P.XLEN-1:0] FIntResM,                       // data to be written to integer register (to IEU)
  output logic             IllegalFPUInstrD,               // Is the instruction an illegal fpu instruction (to IFU)
  output logic [4:0]       SetFflagsM,                     // FPU flags (to privileged unit)
  // Writeback stage                                       
  input  logic [4:0]       RdW,                            // which FP register to write to (from IEU)
-  input  logic [`FLEN-1:0] ReadDataW,                      // Read data (from LSU)
-  output logic [`XLEN-1:0] FCvtIntResW,                    // convert result to to be written to integer register (to IEU)
+  input  logic [P.FLEN-1:0] ReadDataW,                      // Read data (from LSU)
+  output logic [P.XLEN-1:0] FCvtIntResW,                    // convert result to to be written to integer register (to IEU)
  output logic             FCvtIntW,                       // select FCvtIntRes (to IEU)
-  output logic [`XLEN-1:0] FIntDivResultW                  // Result from integer division (to IEU)
+  output logic [P.XLEN-1:0] FIntDivResultW                  // Result from integer division (to IEU)
 );

  // RISC-V FPU specifics:
@ -72,7 +70,7 @@ module fpu (
  // control signals
  logic                    FRegWriteW;                        // FP register write enable
  logic [2:0]              FrmM;                              // FP rounding mode
-  logic [`FMTBITS-1:0]     FmtE, FmtM;                        // FP precision 0-single 1-double
+  logic [P.FMTBITS-1:0]     FmtE, FmtM;                        // FP precision 0-single 1-double
  logic                    FDivStartE, IDivStartE;            // Start division or squareroot
  logic                    FWriteIntM;                        // Write to integer register
  logic [1:0]              ForwardXE, ForwardYE, ForwardZE;   // forwarding mux control signals
@ -86,20 +84,20 @@ module fpu (
  logic                    FRegWriteE;                        // Write floating-point register

  // regfile signals
-  logic [`FLEN-1:0]        FRD1D, FRD2D, FRD3D;                  // Read Data from FP register - decode stage
-  logic [`FLEN-1:0]        FRD1E, FRD2E, FRD3E;                  // Read Data from FP register - execute stage
-  logic [`FLEN-1:0]        XE;                                   // Input 1 to the various units (after forwarding)
-  logic [`XLEN-1:0]        IntSrcXE;                             // Input 1 to the various units (after forwarding)
-  logic [`FLEN-1:0]        PreYE, YE;                            // Input 2 to the various units (after forwarding)
-  logic [`FLEN-1:0]        PreZE, ZE;                            // Input 3 to the various units (after forwarding)
+  logic [P.FLEN-1:0]        FRD1D, FRD2D, FRD3D;                  // Read Data from FP register - decode stage
+  logic [P.FLEN-1:0]        FRD1E, FRD2E, FRD3E;                  // Read Data from FP register - execute stage
+  logic [P.FLEN-1:0]        XE;                                   // Input 1 to the various units (after forwarding)
+  logic [P.XLEN-1:0]        IntSrcXE;                             // Input 1 to the various units (after forwarding)
+  logic [P.FLEN-1:0]        PreYE, YE;                            // Input 2 to the various units (after forwarding)
+  logic [P.FLEN-1:0]        PreZE, ZE;                            // Input 3 to the various units (after forwarding)

  // unpacking signals
  logic                    XsE, YsE, ZsE;                        // input's sign - execute stage
  logic                    XsM, YsM;                             // input's sign - memory stage
-  logic [`NE-1:0]          XeE, YeE, ZeE;                        // input's exponent - execute stage
-  logic [`NE-1:0]          ZeM;                                  // input's exponent - memory stage
-  logic [`NF:0]            XmE, YmE, ZmE;                        // input's significand - execute stage
-  logic [`NF:0]            XmM, YmM, ZmM;                        // input's significand - memory stage
+  logic [P.NE-1:0]          XeE, YeE, ZeE;                        // input's exponent - execute stage
+  logic [P.NE-1:0]          ZeM;                                  // input's exponent - memory stage
+  logic [P.NF:0]            XmE, YmE, ZmE;                        // input's significand - execute stage
+  logic [P.NF:0]            XmM, YmM, ZmM;                        // input's significand - memory stage
  logic                    XNaNE, YNaNE, ZNaNE;                  // is the input a NaN - execute stage
  logic                    XNaNM, YNaNM, ZNaNM;                  // is the input a NaN - memory stage
  logic                    XSNaNE, YSNaNE, ZSNaNE;               // is the input a signaling NaN - execute stage
@ -110,56 +108,56 @@ module fpu (
  logic                    XInfE, YInfE, ZInfE;                  // is the input infinity - execute stage
  logic                    XInfM, YInfM, ZInfM;                  // is the input infinity - memory stage
  logic                    XExpMaxE;                             // is the exponent all ones (max value)
-  logic [`FLEN-1:0]        XPostBoxE;                            // X after fixing bad NaN box.  Needed for 1-input operations
+  logic [P.FLEN-1:0]        XPostBoxE;                            // X after fixing bad NaN box.  Needed for 1-input operations

  // Fma Signals
  logic                    FmaAddSubE;                           // Multiply by 1.0 when adding or subtracting
  logic [1:0]              FmaZSelE;                             // Select Z = Y when adding or subtracting, 0 when multiplying
-  logic [3*`NF+3:0]        SmE, SmM;                             // Sum significand
+  logic [3*P.NF+3:0]        SmE, SmM;                             // Sum significand
  logic                    FmaAStickyE, FmaAStickyM;             // FMA addend sticky bit output
-  logic [`NE+1:0]          SeE,SeM;                              // Sum exponent
+  logic [P.NE+1:0]          SeE,SeM;                              // Sum exponent
  logic                    InvAE, InvAM;                         // Invert addend
  logic                    AsE, AsM;                             // Addend sign
  logic                    PsE, PsM;                             // Product sign
  logic                    SsE, SsM;                             // Sum sign
-  logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM;                      // LZA sum leading zero count
+  logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM;                      // LZA sum leading zero count
  
  // Cvt Signals
-  logic [`NE:0]               CeE, CeM;                           // convert intermediate expoent
-  logic [`LOGCVTLEN-1:0]      CvtShiftAmtE, CvtShiftAmtM;         // how much to shift by
+  logic [P.NE:0]               CeE, CeM;                           // convert intermediate expoent
+  logic [P.LOGCVTLEN-1:0]      CvtShiftAmtE, CvtShiftAmtM;         // how much to shift by
  logic                       CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
  logic                       CsE, CsM;                           // convert result sign
  logic                       IntZeroE, IntZeroM;                 // is the integer zero?
-  logic [`CVTLEN-1:0]         CvtLzcInE, CvtLzcInM;               // input to the Leading Zero Counter (priority encoder)
-  logic [`XLEN-1:0]           FCvtIntResM;                        // fcvt integer result (for IEU)
+  logic [P.CVTLEN-1:0]         CvtLzcInE, CvtLzcInM;               // input to the Leading Zero Counter (priority encoder)
+  logic [P.XLEN-1:0]           FCvtIntResM;                        // fcvt integer result (for IEU)
  
  // divide signals
-  logic [`DIVb:0]             QmM;                                // fdivsqrt signifcand
-  logic [`NE+1:0]             QeM;                                // fdivsqrt exponent
+  logic [P.DIVb:0]             QmM;                                // fdivsqrt signifcand
+  logic [P.NE+1:0]             QeM;                                // fdivsqrt exponent
  logic                       DivStickyM;                         // fdivsqrt sticky bit
  logic                       FDivDoneE, IFDivStartE;             // fdivsqrt control signals
-  logic [`XLEN-1:0]           FIntDivResultM;                     // fdivsqrt integer division result (for IEU)
+  logic [P.XLEN-1:0]           FIntDivResultM;                     // fdivsqrt integer division result (for IEU)

  // result and flag signals
-  logic [`XLEN-1:0]           ClassResE;                          // classify result
-  logic [`FLEN-1:0]           CmpFpResE;                          // compare result to FPU (min/max)
-  logic [`XLEN-1:0]           CmpIntResE;                         // compare result to IEU (eq/lt/le)
+  logic [P.XLEN-1:0]           ClassResE;                          // classify result
+  logic [P.FLEN-1:0]           CmpFpResE;                          // compare result to FPU (min/max)
+  logic [P.XLEN-1:0]           CmpIntResE;                         // compare result to IEU (eq/lt/le)
  logic                       CmpNVE;                             // compare invalid flag (Not Valid)     
-  logic [`FLEN-1:0]           SgnResE;                            // sign injection result
-  logic [`XLEN-1:0]           FIntResE;                           // FPU to IEU E-stage result (classify, compare, move)
-  logic [`FLEN-1:0]           PostProcResM;                       // Postprocessor output
+  logic [P.FLEN-1:0]           SgnResE;                            // sign injection result
+  logic [P.XLEN-1:0]           FIntResE;                           // FPU to IEU E-stage result (classify, compare, move)
+  logic [P.FLEN-1:0]           PostProcResM;                       // Postprocessor output
  logic [4:0]                 PostProcFlgM;                       // Postprocessor flags
  logic                       PreNVE, PreNVM;                     // selected flag that is ready in the memory stage     
-  logic [`FLEN-1:0]           FpResM, FpResW;                     // FPU preliminary result
-  logic [`FLEN-1:0]           PreFpResE, PreFpResM;               // selected result that is ready in the memory stage
-  logic [`FLEN-1:0]           FResultW;                           // final FP result being written to the FP register   
+  logic [P.FLEN-1:0]           FpResM, FpResW;                     // FPU preliminary result
+  logic [P.FLEN-1:0]           PreFpResE, PreFpResM;               // selected result that is ready in the memory stage
+  logic [P.FLEN-1:0]           FResultW;                           // final FP result being written to the FP register   

  // other signals
-  logic [`FLEN-1:0]           AlignedSrcAE;                       // align SrcA from IEU to the floating point format for fmv
-  logic [`FLEN-1:0]           BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
-  logic [`FLEN-1:0]           BoxedOneE;                          // One value for Z for multiplication, with NaN boxing if needed
+  logic [P.FLEN-1:0]           AlignedSrcAE;                       // align SrcA from IEU to the floating point format for fmv
+  logic [P.FLEN-1:0]           BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
+  logic [P.FLEN-1:0]           BoxedOneE;                          // One value for Z for multiplication, with NaN boxing if needed
  logic                       StallUnpackedM;                     // Stall unpacker outputs during multicycle fdivsqrt
-  logic [`FLEN-1:0]           SgnExtXE;                           // Sign-extended X input for move to integer
+  logic [P.FLEN-1:0]           SgnExtXE;                           // Sign-extended X input for move to integer
  logic                       mvsgn;                              // sign bit for extending move

  //////////////////////////////////////////////////////////////////////////////////////////
@ -167,7 +165,7 @@ module fpu (
  //////////////////////////////////////////////////////////////////////////////////////////

  // calculate FP control signals
-  fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), 
+  fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), 
              .Funct3E, .IntDivE, .InstrD,
              .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
              .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
@ -177,15 +175,15 @@ module fpu (
              .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);

  // FP register file
-  fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
+  fregfile #(P.FLEN) fregfile (.clk, .reset, .we4(FRegWriteW),
    .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), 
    .a4(RdW), .wd4(FResultW),
    .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));  

  // D/E pipeline registers  
-  flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
-  flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
-  flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
+  flopenrc #(P.FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
+  flopenrc #(P.FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
+  flopenrc #(P.FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);

  //////////////////////////////////////////////////////////////////////////////////////////
  // Execute Stage: hazards, forwarding, unpacking, execution units
@ -197,37 +195,37 @@ module fpu (
    .XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);

  // forwarding muxs
-  mux3  #(`FLEN)  fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
-  mux3  #(`FLEN)  fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
-  mux3  #(`FLEN)  fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
+  mux3  #(P.FLEN)  fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
+  mux3  #(P.FLEN)  fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
+  mux3  #(P.FLEN)  fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);

  // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
-  if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
-  else if(`FPSIZES == 2) 
-      mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
-  else if(`FPSIZES == 3 | `FPSIZES == 4) 
-      mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, 
-                          {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, 
-                          {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, 
-                          {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
+  if(P.FPSIZES == 1) assign BoxedOneE = {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)};
+  else if(P.FPSIZES == 2) 
+      mux2 #(P.FLEN) fonemux ({{P.FLEN-P.LEN1{1'b1}}, 2'b0, {P.NE1-1{1'b1}}, (P.NF1)'(0)}, {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
+  else if(P.FPSIZES == 3 | P.FPSIZES == 4) 
+      mux4 #(P.FLEN) fonemux ({{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)}, 
+                          {{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)}, 
+                          {{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)}, 
+                          {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
  assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
-  mux2  #(`FLEN)  fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
+  mux2  #(P.FLEN)  fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
  
  // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
  // For add and subtract, Z comes from second source operand
- if(`FPSIZES == 1) assign BoxedZeroE = 0;
-  else if(`FPSIZES == 2) 
-    mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
-  else if(`FPSIZES == 3 | `FPSIZES == 4)
-    mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}, 
-                                {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, 
-                                {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, 
-                                (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
+ if(P.FPSIZES == 1) assign BoxedZeroE = 0;
+  else if(P.FPSIZES == 2) 
+    mux2 #(P.FLEN) fmulzeromux ({{P.FLEN-P.LEN1{1'b1}}, {P.LEN1{1'b0}}}, (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
+  else if(P.FPSIZES == 3 | P.FPSIZES == 4)
+    mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}, 
+                                {{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}, 
+                                {{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}, 
+                                (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
  assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
-  mux3  #(`FLEN)  fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
+  mux3  #(P.FLEN)  fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);

  // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
-  unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
+  unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
    .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
    .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), 
    .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), 
@ -235,99 +233,99 @@ module fpu (
    .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
  
  // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
-  fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
+  fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
    .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE), 
    .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); 

  // divide and square root: fdiv, fsqrt, optionally integer division
-  fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
+  fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
    .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
    .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
    .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, 
    .QmM, .FIntDivResultM);

  // compare: fmin/fmax, flt/fle/feq
-  fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), 
+  fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), 
    .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), 
    .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), 
    .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));

  // sign injection: fsgnj/fsgnjx/fsgnjn
-  fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));
+  fsgninj #(P) fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));

  // classify: fclass
-  fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), 
+  fclassify #(P) fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), 
    .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));

  // convert: fcvt.*.*
-  fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), 
+  fcvt #(P) fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), 
    .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), 
    .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));


  // NaN Box SrcA to convert integer to requested FP size
-  if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
-  else if(`FPSIZES == 2) 
-    mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
-  else if(`FPSIZES == 3 | `FPSIZES == 4)
-    mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]}, 
-                            {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, 
-                            {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, 
-                            {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
+  if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE};
+  else if(P.FPSIZES == 2) 
+    mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
+  else if(P.FPSIZES == 3 | P.FPSIZES == 4)
+    mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, 
+                            {{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]}, 
+                            {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, 
+                            {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes

  // select a result that may be written to the FP register
-  mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
+  mux3  #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
  assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);

  // select the result that may be written to the integer register with fmv - to IEU
-  if(`FPSIZES == 1) begin
-    assign mvsgn = XE[`FLEN-1];
+  if(P.FPSIZES == 1) begin
+    assign mvsgn = XE[P.FLEN-1];
    assign SgnExtXE = XE;
-  end else if(`FPSIZES == 2) begin
-    mux2 #(1)     sgnmux (XE[`LEN1-1], XE[`FLEN-1],FmtE, mvsgn);
-    mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{mvsgn}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
-  end else if(`FPSIZES == 3 | `FPSIZES == 4) begin
-    mux4 #(1)     sgnmux (XE[`H_LEN-1], XE[`S_LEN-1], XE[`D_LEN-1], XE[`LLEN-1], FmtE, mvsgn);
-    mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{mvsgn}}, XE[`H_LEN-1:0]}, 
-                                {{`FLEN-`S_LEN{mvsgn}}, XE[`S_LEN-1:0]}, 
-                                {{`FLEN-`D_LEN{mvsgn}}, XE[`D_LEN-1:0]}, 
+  end else if(P.FPSIZES == 2) begin
+    mux2 #(1)     sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn);
+    mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE);
+  end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin
+    mux4 #(1)     sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn);
+    mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]}, 
+                                {{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]}, 
+                                {{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]}, 
                                XE, FmtE, SgnExtXE); 
  end

-  if (`FLEN>`XLEN)
-    assign IntSrcXE = SgnExtXE[`XLEN-1:0];
+  if (P.FLEN>P.XLEN)
+    assign IntSrcXE = SgnExtXE[P.XLEN-1:0];
  else 
-    assign IntSrcXE = {{`XLEN-`FLEN{mvsgn}}, SgnExtXE};
-  mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
+    assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE};
+  mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);

  // E/M pipe registers

  // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
  assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); 

-  flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
-  flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
-  flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
-  flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
-  flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
+  flopenrc #(P.NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
+  flopenrc #(P.NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
+  flopenrc #(P.FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
+  flopenrc #(P.XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
+  flopenrc #(P.FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
  flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM, 
    {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
    {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
  flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
-  flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
-  flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
+  flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
+  flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
    {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
    {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
-  flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+  flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
    {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
    {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
-  flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
+  flopenrc #(P.FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);

  //////////////////////////////////////////////////////////////////////////////////////////
  // Memory Stage: postprocessor and result muxes
  //////////////////////////////////////////////////////////////////////////////////////////

-  postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), 
+  postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), 
    .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
    .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), 
    .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
@ -337,18 +335,18 @@ module fpu (

  // FPU flag selection - to privileged
  mux2  #(5)      FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
-  mux2  #(`FLEN)  FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
+  mux2  #(P.FLEN)  FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);

  // M/W pipe registers
-  flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
-  flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
-  flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); 
+  flopenrc #(P.FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
+  flopenrc #(P.XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
+  flopenrc #(P.XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); 

  //////////////////////////////////////////////////////////////////////////////////////////
  // Writeback Stage: result mux
  //////////////////////////////////////////////////////////////////////////////////////////

  // select the result to be written to the FP register
-  mux2  #(`FLEN)  FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
+  mux2  #(P.FLEN)  FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);

 endmodule // fpu
--- a/src/fpu/fregfile.sv
+++ b/src/fpu/fregfile.sv
@ -26,17 +26,15 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fregfile (
+module fregfile #(parameter FLEN) (
  input logic              clk, reset,
  input logic              we4,             // write enable
  input logic [4:0]        a1, a2, a3, a4,  // adresses
-  input logic [`FLEN-1:0]  wd4,             // write data
-  output logic [`FLEN-1:0] rd1, rd2, rd3    // read data
+  input logic [FLEN-1:0]  wd4,             // write data
+  output logic [FLEN-1:0] rd1, rd2, rd3    // read data
 );
   
-   logic [`FLEN-1:0] rf[31:0];
+   logic [FLEN-1:0] rf[31:0];
   integer i;
   
   // three ported register file
--- a/src/fpu/fsgninj.sv
+++ b/src/fpu/fsgninj.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fsgninj (  
+module fsgninj import cvw::*;  #(parameter cvw_t P) (
  input  logic                Xs, Ys, // X and Y sign bits
-  input  logic [`FLEN-1:0]    X,      // X
-  input  logic [`FMTBITS-1:0] Fmt,    // format
+  input  logic [P.FLEN-1:0]    X,      // X
+  input  logic [P.FMTBITS-1:0] Fmt,    // format
  input  logic [1:0]          OpCtrl, // operation control
-  output logic [`FLEN-1:0]    SgnRes  // result
+  output logic [P.FLEN-1:0]    SgnRes  // result
 );

  logic ResSgn;  // result sign
@ -50,30 +48,30 @@ module fsgninj (
  //    - uses NaN-blocking format
  //        - if there are any unused bits the most significant bits are filled with 1s
  
-  if (`FPSIZES == 1)
-    assign SgnRes = {ResSgn, X[`FLEN-2:0]};
-  else if (`FPSIZES == 2)
-    assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
-  else if (`FPSIZES ==  3) begin
+  if (P.FPSIZES == 1)
+    assign SgnRes = {ResSgn, X[P.FLEN-2:0]};
+  else if (P.FPSIZES == 2)
+    assign SgnRes = {~Fmt|ResSgn, X[P.FLEN-2:P.LEN1], Fmt ? X[P.LEN1-1] : ResSgn, X[P.LEN1-2:0]};
+  else if (P.FPSIZES ==  3) begin
    logic [2:0] SgnBits;
    always_comb
      case (Fmt)
-        `FMT:    SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
-        `FMT1:   SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
-        `FMT2:   SgnBits = {2'b11, ResSgn};
+        P.FMT:    SgnBits = {ResSgn, X[P.LEN1-1], X[P.LEN2-1]};
+        P.FMT1:   SgnBits = {1'b1, ResSgn, X[P.LEN2-1]};
+        P.FMT2:   SgnBits = {2'b11, ResSgn};
        default: SgnBits = {3{1'bx}};
      endcase
-    assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
-  end else if (`FPSIZES == 4) begin
+    assign SgnRes = {SgnBits[2], X[P.FLEN-2:P.LEN1], SgnBits[1], X[P.LEN1-2:P.LEN2], SgnBits[0], X[P.LEN2-2:0]};
+  end else if (P.FPSIZES == 4) begin
    logic [3:0] SgnBits;
    always_comb
      case (Fmt)
-        `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
-        `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
-        `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
-        `H_FMT: SgnBits = {3'b111, ResSgn};
+        P.Q_FMT: SgnBits = {ResSgn, X[P.D_LEN-1], X[P.S_LEN-1], X[P.H_LEN-1]};
+        P.D_FMT: SgnBits = {1'b1, ResSgn, X[P.S_LEN-1], X[P.H_LEN-1]};
+        P.S_FMT: SgnBits = {2'b11, ResSgn, X[P.H_LEN-1]};
+        P.H_FMT: SgnBits = {3'b111, ResSgn};
      endcase
-    assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
+    assign SgnRes = {SgnBits[3], X[P.Q_LEN-2:P.D_LEN], SgnBits[2], X[P.D_LEN-2:P.S_LEN], SgnBits[1], X[P.S_LEN-2:P.H_LEN], SgnBits[0], X[P.H_LEN-2:0]};
  end

 endmodule
--- a/src/fpu/postproc/cvtshiftcalc.sv
+++ b/src/fpu/postproc/cvtshiftcalc.sv
@ -26,22 +26,20 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module cvtshiftcalc(
+module cvtshiftcalc import cvw::*;  #(parameter cvw_t P) (
  input  logic                    XZero,              // is the input zero?
  input  logic                    ToInt,              // to integer conversion?
  input  logic                    IntToFp,            // interger to floating point conversion?
-  input  logic [`FMTBITS-1:0]     OutFmt,             // output format
-  input  logic [`NE:0]            CvtCe,              // the calculated expoent
-  input  logic [`NF:0]            Xm,                 // input mantissas
-  input  logic [`CVTLEN-1:0]      CvtLzcIn,           // input to the Leading Zero Counter (without msb)
+  input  logic [P.FMTBITS-1:0]     OutFmt,             // output format
+  input  logic [P.NE:0]            CvtCe,              // the calculated expoent
+  input  logic [P.NF:0]            Xm,                 // input mantissas
+  input  logic [P.CVTLEN-1:0]      CvtLzcIn,           // input to the Leading Zero Counter (without msb)
  input  logic                    CvtResSubnormUf,    // is the conversion result subnormal or underlows
  output logic                    CvtResUf,           // does the cvt result unerflow
-  output logic [`CVTLEN+`NF:0]    CvtShiftIn          // number to be shifted
+  output logic [P.CVTLEN+P.NF:0]    CvtShiftIn          // number to be shifted
 );

-  logic [$clog2(`NF):0]           ResNegNF;           // the result's fraction length negated (-NF)
+  logic [$clog2(P.NF):0]           ResNegNF;           // the result's fraction length negated (-NF)

  ///////////////////////////////////////////////////////////////////////////
  // shifter
@ -49,7 +47,7 @@ module cvtshiftcalc(

  // seclect the input to the shifter
  //      fp  -> int:
-  //          |  `XLEN  zeros |     mantissa      | 0's if nessisary |
+  //          |  P.XLEN  zeros |     mantissa      | 0's if nessisary |
  //                          .
  //          Other problems:
  //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
@ -57,7 +55,7 @@ module cvtshiftcalc(
  //                  - ex: for the case 0010000.... (double)
  //      ??? -> fp:
  //          - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left:
-  //              |  `NF-1  zeros   |     mantissa      | 0's if nessisary | 
+  //              |  P.NF-1  zeros   |     mantissa      | 0's if nessisary | 
  //              .
  //          - otherwise:
  //              |     LzcInM      | 0's if nessisary | 
@ -67,33 +65,33 @@ module cvtshiftcalc(
  //                                                        get rid of round bit if needed
  //                                                        |                    add sticky bit if needed
  //                                                        |                    |
-      if (ToInt)               CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
-      else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
-      else                     CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
+      if (ToInt)               CvtShiftIn = {{P.XLEN{1'b0}}, Xm[P.NF]&~CvtCe[P.NE], Xm[P.NF-1]|(CvtCe[P.NE]&Xm[P.NF]), Xm[P.NF-2:0], {P.CVTLEN-P.XLEN{1'b0}}};
+      else if (CvtResSubnormUf) CvtShiftIn = {{P.NF-1{1'b0}}, Xm, {P.CVTLEN-P.NF+1{1'b0}}};
+      else                     CvtShiftIn = {CvtLzcIn, {P.NF+1{1'b0}}};
  
  // choose the negative of the fraction size
-  if (`FPSIZES == 1) begin
-      assign ResNegNF = -($clog2(`NF)+1)'(`NF); 
+  if (P.FPSIZES == 1) begin
+      assign ResNegNF = -($clog2(P.NF)+1)'(P.NF); 

-  end else if (`FPSIZES == 2) begin
-      assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
+  end else if (P.FPSIZES == 2) begin
+      assign ResNegNF = OutFmt ? -($clog2(P.NF)+1)'(P.NF) : -($clog2(P.NF)+1)'(P.NF1);

-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
      always_comb
          case (OutFmt)
-              `FMT:  ResNegNF = -($clog2(`NF)+1)'(`NF);
-              `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
-              `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
+              P.FMT:  ResNegNF = -($clog2(P.NF)+1)'(P.NF);
+              P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1);
+              P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2);
              default: ResNegNF = 1'bx;
          endcase

-  end else if (`FPSIZES == 4) begin        
+  end else if (P.FPSIZES == 4) begin        
      always_comb
          case (OutFmt)
-              2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
-              2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
-              2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
-              2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
+              2'h3: ResNegNF = -($clog2(P.NF)+1)'(P.Q_NF);
+              2'h1: ResNegNF = -($clog2(P.NF)+1)'(P.D_NF);
+              2'h0: ResNegNF = -($clog2(P.NF)+1)'(P.S_NF);
+              2'h2: ResNegNF = -($clog2(P.NF)+1)'(P.H_NF);
          endcase
  end

@ -102,6 +100,6 @@ module cvtshiftcalc(
  // determine if the result underflows ??? -> fp
  //      - if the first 1 is shifted out of the result then the result underflows
  //      - can't underflow an integer to fp conversions
-  assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
+  assign CvtResUf = ($signed(CvtCe) < $signed({{P.NE-$clog2(P.NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
  
-endmodule
+endmodule
--- a/src/fpu/postproc/divshiftcalc.sv
+++ b/src/fpu/postproc/divshiftcalc.sv
@ -26,24 +26,22 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////`include "wally-config.vh"

-`include "wally-config.vh"
-
-module divshiftcalc(
-  input  logic [`DIVb:0]              DivQm,              // divsqrt significand
-  input  logic [`NE+1:0]              DivQe,              // divsqrt exponent
-  output logic [`LOGNORMSHIFTSZ-1:0]  DivShiftAmt,        // divsqrt shift amount
-  output logic [`NORMSHIFTSZ-1:0]     DivShiftIn,         // divsqrt shift input
+module divshiftcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.DIVb:0]              DivQm,              // divsqrt significand
+  input  logic [P.NE+1:0]              DivQe,              // divsqrt exponent
+  output logic [P.LOGNORMSHIFTSZ-1:0]  DivShiftAmt,        // divsqrt shift amount
+  output logic [P.NORMSHIFTSZ-1:0]     DivShiftIn,         // divsqrt shift input
  output logic                        DivResSubnorm,      // is the divsqrt result subnormal
  output logic                        DivSubnormShiftPos  // is the subnormal shift amount positive
 );

-  logic [`LOGNORMSHIFTSZ-1:0]         NormShift;          // normalized result shift amount
-  logic [`LOGNORMSHIFTSZ-1:0]         DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
-  logic [`NE+1:0]                     DivSubnormShift;    // subnormal result shift amount
+  logic [P.LOGNORMSHIFTSZ-1:0]         NormShift;          // normalized result shift amount
+  logic [P.LOGNORMSHIFTSZ-1:0]         DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
+  logic [P.NE+1:0]                     DivSubnormShift;    // subnormal result shift amount

  // is the result subnormal
  // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
-  assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
+  assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);

  // if the result is subnormal
  //  00000000x.xxxxxx...                     Exp = DivQe
@ -51,8 +49,8 @@ module divshiftcalc(
  //  .00xxxxxxxxxxxxx... << DivQe+NF+1  Exp = +1
  //  .0000xxxxxxxxxxx... >> 1                Exp = 1
  // Left shift amount  = DivQe+NF+1-1
-  assign DivSubnormShift = (`NE+2)'(`NF)+DivQe;
-  assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1];
+  assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
+  assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];

  // if the result is normalized
  //  00000000x.xxxxxx...                     Exp = DivQe
@ -62,13 +60,13 @@ module divshiftcalc(
  //  00000000xx.xxxxx... << 1?               Exp = DivQe-1 (determined after)
  // inital Left shift amount  = NF
  // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
-  assign NormShift = (`LOGNORMSHIFTSZ)'(`NF);
+  assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);

  // if the shift amount is negitive then don't shift (keep sticky bit)
  // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
-  assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0;
+  assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0;
  assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;

  // pre-shift the divider result for normalization
-  assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}};
+  assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
 endmodule
--- a/src/fpu/postproc/flags.sv
+++ b/src/fpu/postproc/flags.sv
@ -25,18 +25,17 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module flags(
+module flags import cvw::*;  #(parameter cvw_t P) (
  input  logic                Xs,                     // X sign
-  input  logic [`FMTBITS-1:0] OutFmt,                 // output format
+  input  logic [P.FMTBITS-1:0] OutFmt,                 // output format
  input  logic                InfIn,                  // is a Inf input being used
  input  logic                XInf, YInf, ZInf,       // inputs are infinity
  input  logic                NaNIn,                  // is a NaN input being used
  input  logic                XSNaN, YSNaN, ZSNaN,    // inputs are signaling NaNs
  input  logic                XZero, YZero,           // inputs are zero
-  input  logic [`NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
-  input  logic [`NE+1:0]      Me,                     // exponent of the normalized sum
+  input  logic [P.NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
+  input  logic [P.NE+1:0]      Me,                     // exponent of the normalized sum
  // rounding
  input  logic                Plus1,                  // do you add one for rounding
  input  logic                Round, Guard, Sticky,   // bits used to determine rounding
@ -47,7 +46,7 @@ module flags(
  input  logic                IntToFp,                // convert integer to floating point
  input  logic                Int64,                  // convert to 64 bit integer
  input  logic                Signed,                 // convert to a signed integer
-  input  logic [`NE:0]        CvtCe,                  // the calculated expoent - Cvt
+  input  logic [P.NE:0]        CvtCe,                  // the calculated expoent - Cvt
  input  logic [1:0]          CvtNegResMsbs,          // the negitive integer result's most significant bits
  // divsqrt
  input  logic                DivOp,                  // conversion opperation?
@ -92,33 +91,33 @@ module flags(
  //          - any of the bits after the most significan 1 is one
  //          - the most signifcant in 65 or 33 is still a one in the number and
  //            one of the later bits is one
-  if (`FPSIZES == 1) begin
-      assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
-      assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
+  if (P.FPSIZES == 1) begin
+      assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+      assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));

-  end else if (`FPSIZES == 2) begin    
-      assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+  end else if (P.FPSIZES == 2) begin    
+      assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);

-      assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
-  end else if (`FPSIZES == 3) begin
+      assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
+  end else if (P.FPSIZES == 3) begin
      always_comb
          case (OutFmt)
-              `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
-              `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
-              `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
+              P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+              P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+              P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
              default: ResExpGteMax = 1'bx;
          endcase
-          assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
+          assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));

-  end else if (`FPSIZES == 4) begin        
+  end else if (P.FPSIZES == 4) begin        
      always_comb
          case (OutFmt)
-              `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
-              `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
-              `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
-              `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
+              P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
+              P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
+              P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
+              P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
          endcase
-          assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
+          assign ShiftGtIntSz = (|FullRe[P.Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
  end


@ -127,7 +126,7 @@ module flags(
  //                 |           and the exponent isn't negitive
  //                 |           |                   if the input isnt infinity or NaN
  //                 |           |                   |            
-  assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
+  assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);

  ///////////////////////////////////////////////////////////////////////////////
  // Underflow
@ -141,7 +140,7 @@ module flags(
  //                  |                    |                    |                                      |                     and if the result is not exact
  //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
  //                  |                    |                    |                                      |                     |               |
-  assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);


  ///////////////////////////////////////////////////////////////////////////////
@ -156,7 +155,7 @@ module flags(
  //                  if the res is too small to be represented and not 0
  //                  |                                     and if the res is not invalid (outside the integer bounds)
  //                  |                                     |
-  assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
+  assign IntInexact = ((CvtCe[P.NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;

  // select the inexact flag to output
  assign Inexact = ToInt ? IntInexact : FpInexact;
@ -178,7 +177,7 @@ module flags(
  //                  |           |                                  |                    |               or the res rounds up out of bounds
  //                  |           |                                  |                    |                       and the res didn't underflow
  //                  |           |                                  |                    |                       |
-  assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
+  assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[P.NE+1])|((Xs&~Signed)&(~((CvtCe[P.NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
  //                                                                                                     |
  //                                                                                                     or when the positive res rounds up out of range
  
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@ -26,21 +26,19 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module fmashiftcalc(
-  input  logic [`FMTBITS-1:0]         Fmt,            // precision 1 = double 0 = single
-  input  logic [`NE+1:0]              FmaSe,          // sum's exponent
-  input  logic [3*`NF+3:0]            FmaSm,          // the positive sum
-  input  logic [$clog2(3*`NF+5)-1:0]  FmaSCnt,        // normalization shift count
-  output logic [`NE+1:0]              NormSumExp,     // exponent of the normalized sum not taking into account Subnormal or zero results
+module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0]         Fmt,            // precision 1 = double 0 = single
+  input  logic [P.NE+1:0]              FmaSe,          // sum's exponent
+  input  logic [3*P.NF+3:0]            FmaSm,          // the positive sum
+  input  logic [$clog2(3*P.NF+5)-1:0]  FmaSCnt,        // normalization shift count
+  output logic [P.NE+1:0]              NormSumExp,     // exponent of the normalized sum not taking into account Subnormal or zero results
  output logic                        FmaSZero,       // is the result subnormal - calculated before LZA corection
  output logic                        FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
-  output logic [$clog2(3*`NF+5)-1:0]  FmaShiftAmt,    // normalization shift count
-  output logic [3*`NF+5:0]            FmaShiftIn      // is the sum zero
+  output logic [$clog2(3*P.NF+5)-1:0]  FmaShiftAmt,    // normalization shift count
+  output logic [3*P.NF+5:0]            FmaShiftIn      // is the sum zero
 );
-  logic [`NE+1:0] PreNormSumExp;  // the exponent of the normalized sum with the `FLEN bias
-  logic [`NE+1:0] BiasCorr;       // correction for bias
+  logic [P.NE+1:0] PreNormSumExp;  // the exponent of the normalized sum with the P.FLEN bias
+  logic [P.NE+1:0] BiasCorr;       // correction for bias

  ///////////////////////////////////////////////////////////////////////////////
  // Normalization
@ -50,75 +48,75 @@ module fmashiftcalc(
  assign FmaSZero = ~(|FmaSm);

  // calculate the sum's exponent
-  assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);
+  assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3);

  //convert the sum's exponent into the proper percision
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin
    assign NormSumExp = PreNormSumExp;
-  end else if (`FPSIZES == 2) begin
-    assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
+  end else if (P.FPSIZES == 2) begin
+    assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS);
    assign NormSumExp = PreNormSumExp+BiasCorr;
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
    always_comb begin
        case (Fmt)
-            `FMT: BiasCorr =  '0;
-            `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
-            `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
+            P.FMT: BiasCorr =  '0;
+            P.FMT1: BiasCorr = (P.NE+2)'(P.BIAS1-P.BIAS);
+            P.FMT2: BiasCorr = (P.NE+2)'(P.BIAS2-P.BIAS);
            default: BiasCorr = 'x;
        endcase
    end
    assign NormSumExp = PreNormSumExp+BiasCorr;
-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
    always_comb begin
        case (Fmt)
            2'h3: BiasCorr = '0;
-            2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
-            2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
-            2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
+            2'h1: BiasCorr = (P.NE+2)'(P.D_BIAS-P.Q_BIAS);
+            2'h0: BiasCorr = (P.NE+2)'(P.S_BIAS-P.Q_BIAS);
+            2'h2: BiasCorr = (P.NE+2)'(P.H_BIAS-P.Q_BIAS);
        endcase
    end
    assign NormSumExp = PreNormSumExp+BiasCorr;
  end
  
  // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin
    logic Sum0LEZ, Sum0GEFL;
-    assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
-    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
+    assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
+    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
    assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
-  end else if (`FPSIZES == 2) begin
+  end else if (P.FPSIZES == 2) begin
    logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
-    assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
-    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
-    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
-    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
+    assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
+    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
+    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
+    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
    assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
    logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
-    assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
-    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
-    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
-    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
-    assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
-    assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
+    assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
+    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
+    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
+    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
+    assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS2));
+    assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-2+P.BIAS-P.BIAS2)) | ~|PreNormSumExp;
    always_comb begin
      case (Fmt)
-        `FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
-        `FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
-        `FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
+        P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+        P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+        P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
        default: FmaPreResultSubnorm = 1'bx;
      endcase
    end
-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
    logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
-    assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
-    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
-    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
-    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
-    assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
-    assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
-    assign Sum3LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
-    assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
+    assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
+    assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
+    assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.D_BIAS));
+    assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.D_NF-2+P.BIAS-P.D_BIAS)) | ~|PreNormSumExp;
+    assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.S_BIAS));
+    assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.S_NF-2+P.BIAS-P.S_BIAS)) | ~|PreNormSumExp;
+    assign Sum3LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.H_BIAS));
+    assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-2+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp;
    always_comb begin
      case (Fmt)
        2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
@ -132,6 +130,6 @@ module fmashiftcalc(
  // set and calculate the shift input and amount
  //  - shift once if killing a product and the result is subnormal
  assign FmaShiftIn = {2'b0, FmaSm};
-  if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
-  else               assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
+  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1;
+  else               assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
 endmodule
--- a/src/fpu/postproc/negateintres.sv
+++ b/src/fpu/postproc/negateintres.sv
@ -25,26 +25,25 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module negateintres(
+module negateintres import cvw::*;  #(parameter cvw_t P) (
  input  logic                    Signed,         // is the integer input signed
  input  logic                    Int64,          // is the integer input 64-bits
  input  logic                    Plus1,          // should one be added for rounding?
  input  logic                    Xs,             // X sign
-  input  logic [`NORMSHIFTSZ-1:0] Shifted,        // output from normalization shifter
+  input  logic [P.NORMSHIFTSZ-1:0] Shifted,        // output from normalization shifter
  output logic [1:0]              CvtNegResMsbs,  // most signigficant bits of possibly negated result
-  output logic [`XLEN+1:0]        CvtNegRes       // possibly negated integer result
+  output logic [P.XLEN+1:0]        CvtNegRes       // possibly negated integer result
 );

-  logic [`XLEN+1:0]               CvtPreRes;      // integer result with rounding
+  logic [P.XLEN+1:0]               CvtPreRes;      // integer result with rounding
  logic [2:0]                     CvtNegResMsbs3; // first three msbs of possibly negated result
    
  // round and negate the positive res if needed
-  assign CvtPreRes =  {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
-  mux2 #(`XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
+  assign CvtPreRes =  {2'b0, Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.XLEN]}+{{P.XLEN+1{1'b0}}, Plus1};
+  mux2 #(P.XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
    
  // select 2 most significant bits
-  mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[`XLEN+1:`XLEN-1], Int64, CvtNegResMsbs3);
+  mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[P.XLEN+1:P.XLEN-1], Int64, CvtNegResMsbs3);
  mux2 #(2) msb2mux(CvtNegResMsbs3[2:1], CvtNegResMsbs3[1:0], Signed, CvtNegResMsbs);
-endmodule
+endmodule
--- a/src/fpu/postproc/normshift.sv
+++ b/src/fpu/postproc/normshift.sv
@ -25,8 +25,6 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"
-

    // convert shift
    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
@ -72,11 +70,11 @@
    //      | Nf 0's |      Qm       | << calculated shift amount
    //        .

-module normshift(
-  input  logic [`LOGNORMSHIFTSZ-1:0]  ShiftAmt,   // shift amount
-  input  logic [`NORMSHIFTSZ-1:0]     ShiftIn,    // number to be shifted
-  output logic [`NORMSHIFTSZ-1:0]     Shifted     // shifted result
+module normshift import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.LOGNORMSHIFTSZ-1:0]  ShiftAmt,   // shift amount
+  input  logic [P.NORMSHIFTSZ-1:0]     ShiftIn,    // number to be shifted
+  output logic [P.NORMSHIFTSZ-1:0]     Shifted     // shifted result
 );
   
  assign Shifted = ShiftIn << ShiftAmt;
-endmodule
+endmodule
--- a/src/fpu/postproc/postprocess.sv
+++ b/src/fpu/postproc/postprocess.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module postprocess (
+module postprocess import cvw::*;  #(parameter cvw_t P) (
  // general signals
  input logic                             Xs, Ys,     // input signs
-  input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
+  input logic  [P.NF:0]                    Xm, Ym, Zm, // input mantissas
  input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-  input logic  [`FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+  input logic  [P.FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
  input logic  [2:0]                      OpCtrl,     // choose which opperation (look below for values)
  input logic                             XZero, YZero,        // inputs are zero
  input logic                             XInf, YInf, ZInf,    // inputs are infinity
@ -44,63 +42,63 @@ module postprocess (
  input logic                             FmaAs,      // the modified Z sign - depends on instruction
  input logic                             FmaPs,      // the product's sign
  input logic                             FmaSs,      // Sum sign
-  input logic  [`NE+1:0]                  FmaSe,      // the sum's exponent
-  input logic  [3*`NF+3:0]                FmaSm,      // the positive sum
+  input logic  [P.NE+1:0]                  FmaSe,      // the sum's exponent
+  input logic  [3*P.NF+3:0]                FmaSm,      // the positive sum
  input logic                             FmaASticky, // sticky bit that is calculated during alignment
-  input logic  [$clog2(3*`NF+5)-1:0]      FmaSCnt,    // the normalization shift count
+  input logic  [$clog2(3*P.NF+5)-1:0]      FmaSCnt,    // the normalization shift count
  //divide signals
  input logic                             DivSticky,  // divider sticky bit
-  input logic  [`NE+1:0]                  DivQe,      // divsqrt exponent
-  input logic  [`DIVb:0]                  DivQm,      // divsqrt significand
+  input logic  [P.NE+1:0]                  DivQe,      // divsqrt exponent
+  input logic  [P.DIVb:0]                  DivQm,      // divsqrt significand
  // conversion signals
  input logic                             CvtCs,      // the result's sign
-  input logic  [`NE:0]                    CvtCe,      // the calculated expoent
+  input logic  [P.NE:0]                    CvtCe,      // the calculated expoent
  input logic                             CvtResSubnormUf, // the convert result is subnormal or underflows
-  input logic  [`LOGCVTLEN-1:0]           CvtShiftAmt,// how much to shift by
+  input logic  [P.LOGCVTLEN-1:0]           CvtShiftAmt,// how much to shift by
  input logic                             ToInt,      // is fp->int (since it's writting to the integer register)
-  input logic  [`CVTLEN-1:0]              CvtLzcIn,   // input to the Leading Zero Counter (without msb)
+  input logic  [P.CVTLEN-1:0]              CvtLzcIn,   // input to the Leading Zero Counter (without msb)
  input logic                             IntZero,    // is the integer input zero
  // final results
-  output logic [`FLEN-1:0]                PostProcRes,// postprocessor final result
+  output logic [P.FLEN-1:0]                PostProcRes,// postprocessor final result
  output logic [4:0]                      PostProcFlg,// postprocesser flags
-  output logic [`XLEN-1:0]                FCvtIntRes  // the integer conversion result
+  output logic [P.XLEN-1:0]                FCvtIntRes  // the integer conversion result
  );
  
  // general signals
  logic                       Rs;         // result sign
-  logic [`NF-1:0]             Rf;         // Result fraction
-  logic [`NE-1:0]             Re;         // Result exponent
+  logic [P.NF-1:0]             Rf;         // Result fraction
+  logic [P.NE-1:0]             Re;         // Result exponent
  logic                       Ms;         // norMalized sign
-  logic [`CORRSHIFTSZ-1:0]    Mf;         // norMalized fraction
-  logic [`NE+1:0]             Me;         // normalized exponent
-  logic [`NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
+  logic [P.CORRSHIFTSZ-1:0]    Mf;         // norMalized fraction
+  logic [P.NE+1:0]             Me;         // normalized exponent
+  logic [P.NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
  logic                       UfPlus1;    // do you add one (for determining underflow flag)
-  logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt;   // normalization shift amount
-  logic [`NORMSHIFTSZ-1:0]    ShiftIn;    // input to normalization shift
-  logic [`NORMSHIFTSZ-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
+  logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt;   // normalization shift amount
+  logic [P.NORMSHIFTSZ-1:0]    ShiftIn;    // input to normalization shift
+  logic [P.NORMSHIFTSZ-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
  logic                       Plus1;      // add one to the final result?
  logic                       Overflow;   // overflow flag used to select results
  logic                       Invalid;    // invalid flag used to select results
  logic                       Guard, Round, Sticky; // bits needed to determine rounding
-  logic [`FMTBITS-1:0]        OutFmt;     // output format
+  logic [P.FMTBITS-1:0]        OutFmt;     // output format
  // fma signals
-  logic [`NE+1:0]             FmaMe;      // exponent of the normalized sum
+  logic [P.NE+1:0]             FmaMe;      // exponent of the normalized sum
  logic                       FmaSZero;   // is the sum zero
-  logic [3*`NF+5:0]           FmaShiftIn; // fma shift input
-  logic [`NE+1:0]             NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
+  logic [3*P.NF+5:0]           FmaShiftIn; // fma shift input
+  logic [P.NE+1:0]             NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
  logic                       FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
-  logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
+  logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
  // division singals
-  logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
-  logic [`NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
-  logic [`NE+1:0]             Qe;                 // divsqrt corrected exponent after corretion shift
+  logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
+  logic [P.NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
+  logic [P.NE+1:0]             Qe;                 // divsqrt corrected exponent after corretion shift
  logic                       DivByZero;          // divide by zero flag
  logic                       DivResSubnorm;      // is the divsqrt result subnormal
  logic                       DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
  // conversion signals
-  logic [`CVTLEN+`NF:0]       CvtShiftIn;         // number to be shifted for converter
+  logic [P.CVTLEN+P.NF:0]       CvtShiftIn;         // number to be shifted for converter
  logic [1:0]                 CvtNegResMsbs;      // most significant bits of possibly negated int result
-  logic [`XLEN+1:0]           CvtNegRes;          // possibly negated integer result
+  logic [P.XLEN+1:0]           CvtNegRes;          // possibly negated integer result
  logic                       CvtResUf;           // did the convert result underflow
  logic                       IntInvalid;         // invalid integer flag
  // readability signals
@ -132,9 +130,9 @@ module postprocess (
  // choose the ouptut format depending on the opperation
  //      - fp -> fp: OpCtrl contains the percision of the output
  //      - otherwise: Fmt contains the percision of the output
-  if (`FPSIZES == 2) 
-      assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); 
-  else if (`FPSIZES == 3 | `FPSIZES == 4) 
+  if (P.FPSIZES == 2) 
+      assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); 
+  else if (P.FPSIZES == 3 | P.FPSIZES == 4) 
      assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 

  ///////////////////////////////////////////////////////////////////////////////
@ -142,40 +140,40 @@ module postprocess (
  ///////////////////////////////////////////////////////////////////////////////

  // final claulations before shifting
-  cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
+  cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
      .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);

-  fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
+  fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
      .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);

-  divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+  divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);

  // select which unit's output to shift
  always_comb
    case(PostProcSel)
      2'b10: begin // fma
-        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
-        ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
+        ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
+        ShiftIn =  {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}};
      end
      2'b00: begin // cvt
-        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
-        ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
+        ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};
+        ShiftIn =  {CvtShiftIn, {P.NORMSHIFTSZ-P.CVTLEN-P.NF-1{1'b0}}};
      end
      2'b01: begin //divsqrt
        ShiftAmt = DivShiftAmt;
        ShiftIn =  DivShiftIn;
      end
      default: begin 
-        ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; 
-        ShiftIn = {`NORMSHIFTSZ{1'bx}}; 
+        ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}}; 
+        ShiftIn = {P.NORMSHIFTSZ{1'bx}}; 
      end
    endcase
  
  // main normalization shift
-  normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
+  normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted);

  // correct for LZA/divsqrt error
-  shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
+  shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
      .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);

  ///////////////////////////////////////////////////////////////////////////////
@ -191,7 +189,7 @@ module postprocess (
  // calulate result sign used in rounding unit
  roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);

-  round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
+  round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
      .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt,  .CvtResUf,
      .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);

@ -206,7 +204,7 @@ module postprocess (
  // Flags
  ///////////////////////////////////////////////////////////////////////////////

-  flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
+  flags #(P) flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
              .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
              .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
              .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
@ -216,9 +214,9 @@ module postprocess (
  // Select the result
  ///////////////////////////////////////////////////////////////////////////////

-  negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
+  negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);

-  specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+  specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
      .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
      .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
      .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
--- a/src/fpu/postproc/resultsign.sv
+++ b/src/fpu/postproc/resultsign.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module resultsign(
  input  logic [2:0]  Frm,        // rounding mode
  input  logic        FmaOp,      // is the operation an Fma
@ -77,4 +75,4 @@ module resultsign(
    else if(FmaSZero&FmaOp) Rs = Zeros;
    else                    Rs = Ms;

-endmodule
+endmodule
--- a/src/fpu/postproc/round.sv
+++ b/src/fpu/postproc/round.sv
@ -26,42 +26,32 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-// what position is XLEN in?
-//  options: 
-//     1: XLEN > NF   > NF1
-//     2: NF   > XLEN > NF1
-//     3: NF   > NF1  > XLEN
-//  single and double will always be smaller than XLEN
-`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
-
-module round(
-  input  logic [`FMTBITS-1:0]     OutFmt,             // output format
+module round import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0]     OutFmt,             // output format
  input  logic [2:0]              Frm,                // rounding mode
  input  logic [1:0]              PostProcSel,        // select the postprocessor output
  input  logic                    Ms,                 // normalized sign
-  input  logic [`CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
+  input  logic [P.CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
  // fma
  input  logic                    FmaOp,              // is an fma opperation being done?
-  input  logic [`NE+1:0]          FmaMe,              // exponent of the normalized sum for fma
+  input  logic [P.NE+1:0]          FmaMe,              // exponent of the normalized sum for fma
  input  logic                    FmaASticky,         // addend's sticky bit
  // divsqrt
  input  logic                    DivOp,              // is a division opperation being done
  input  logic                    DivSticky,          // divsqrt sticky bit
-  input  logic [`NE+1:0]          Qe,                 // the divsqrt calculated expoent
+  input  logic [P.NE+1:0]          Qe,                 // the divsqrt calculated expoent
  // cvt
  input  logic                    CvtOp,              // is a convert opperation being done
  input  logic                    ToInt,              // is the cvt op a cvt to integer
  input  logic                    CvtResSubnormUf,    // is the cvt result subnormal or underflow
  input  logic                    CvtResUf,           // does the cvt result underflow
-  input  logic [`NE:0]            CvtCe,              // the cvt calculated expoent
+  input  logic [P.NE:0]            CvtCe,              // the cvt calculated expoent
  // outputs
-  output logic [`NE+1:0]          Me,                 // normalied fraction
+  output logic [P.NE+1:0]          Me,                 // normalied fraction
  output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
-  output logic [`NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
-  output logic [`NE-1:0]          Re,                 // Result exponent
-  output logic [`NF-1:0]          Rf,                 // Result fractionNormS
+  output logic [P.NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
+  output logic [P.NE-1:0]          Re,                 // Result exponent
+  output logic [P.NF-1:0]          Rf,                 // Result fractionNormS
  output logic                    Sticky,             // sticky bit
  output logic                    Plus1,              // do you add one to the final result
  output logic                    Round, Guard        // bits needed to calculate rounding
@ -69,7 +59,7 @@ module round(

  logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
  logic           NormSticky;         // normalized sum's sticky bit
-  logic [`NF-1:0] RoundFrac;          // rounded fraction
+  logic [P.NF-1:0] RoundFrac;          // rounded fraction
  logic           FpRes;              // is the result a floating point
  logic           IntRes;             // is the result an integer
  logic           FpGuard, FpRound;   // floating point round/guard bits
@ -77,8 +67,17 @@ module round(
  logic           LsbRes;             // lsb of result
  logic           CalcPlus1;          // calculated plus1
  logic           FpPlus1;            // do you add one to the fp result 
-  logic [`FLEN:0] RoundAdd;           // how much to add to the result
+  logic [P.FLEN:0] RoundAdd;           // how much to add to the result

+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+//`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
+  localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
+  
  ///////////////////////////////////////////////////////////////////////////////
  // Rounding
  ///////////////////////////////////////////////////////////////////////////////
@ -115,68 +114,68 @@ module round(
  assign FpRes = ~IntRes;

  // sticky bit calculation
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin

      //     1: XLEN > NF
      //      |         XLEN          |
      //      |    NF     |1|1|
      //                     ^    ^ if floating point result
      //                     ^ if not an FMA result
-      if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
      //     2: NF > XLEN
-      if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);

-  end else if (`FPSIZES == 2) begin
+  end else if (P.FPSIZES == 2) begin
      // XLEN is either 64 or 32
      // so half and single are always smaller then XLEN

      // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
      // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
      // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);

-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
      // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
      // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
      // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);

-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
      // Quad precision will always be greater than XLEN
      // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
      // 3: NF   > NF1  > XLEN
      // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);

  end
  
@ -184,40 +183,40 @@ module round(

  // only add the Addend sticky if doing an FMA opperation
  //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-  assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
+  assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[P.NE+1]&FmaOp | DivSticky&DivOp;
  



  // determine round and LSB of the rounded value
  //      - underflow round bit is used to determint the underflow flag
-  if (`FPSIZES == 1) begin
-      assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
-      assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
-      assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+  if (P.FPSIZES == 1) begin
+      assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
+      assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
+      assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];

-  end else if (`FPSIZES == 2) begin
-      assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
-      assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
-      assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
+  end else if (P.FPSIZES == 2) begin
+      assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
+      assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];

-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
      always_comb
          case (OutFmt)
-              `FMT: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+              P.FMT: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
              end
-              `FMT1: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
+              P.FMT1: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
              end
-              `FMT2: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
+              P.FMT2: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
              end
              default: begin
                  FpGuard = 1'bx;
@ -225,35 +224,35 @@ module round(
                  FpRound = 1'bx;
              end
          endcase
-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
      always_comb
          case (OutFmt)
              2'h3: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
              end
              2'h1: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
              end
              2'h0: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
              end
              2'h2: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
              end
          endcase
  end

-  assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
-  assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
-  assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;
+  assign Guard = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
+  assign LsbRes = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
+  assign Round = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;


  always_comb begin
@ -287,26 +286,26 @@ module round(


  // place Plus1 into the proper position for the format
-  if (`FPSIZES == 1) begin
-      assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
+  if (P.FPSIZES == 1) begin
+      assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};

-  end else if (`FPSIZES == 2) begin
+  end else if (P.FPSIZES == 2) begin
      // \/FLEN+1
      //  | NE+2 |        NF      |
      //  '-NE+2-^----NF1----^
-      // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-      assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
+      // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
+      assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};

-  end else if (`FPSIZES == 3) begin
-      assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
+  end else if (P.FPSIZES == 3) begin
+      assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};

-  end else if (`FPSIZES == 4)      
-      assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
+  end else if (P.FPSIZES == 4)      
+      assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};



  // trim unneeded bits from fraction
-  assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+  assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
  


@ -314,7 +313,7 @@ module round(
  always_comb
      case(PostProcSel)
          2'b10: Me = FmaMe; // fma
-          2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
+          2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
          // 2'b01: Me = DivDone ? Qe : '0; // divide
          2'b01: Me = Qe; // divide
          default: Me = '0; 
@ -325,7 +324,7 @@ module round(
  // round the result
  //      - if the fraction overflows one should be added to the exponent
  assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
-  assign Re = FullRe[`NE-1:0];
+  assign Re = FullRe[P.NE-1:0];


 endmodule
--- a/src/fpu/postproc/roundsign.sv
+++ b/src/fpu/postproc/roundsign.sv
@ -25,7 +25,6 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

 module roundsign(
  input logic         Xs,     // x sign
@ -47,4 +46,4 @@ module roundsign(
  // Select sign for rounding calulation
  assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);

-endmodule
+endmodule
--- a/src/fpu/postproc/shiftcorrection.sv
+++ b/src/fpu/postproc/shiftcorrection.sv
@ -26,53 +26,51 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module shiftcorrection(
-  input logic  [`NORMSHIFTSZ-1:0] Shifted,                // the shifted sum before LZA correction
+module shiftcorrection import cvw::*;  #(parameter cvw_t P) (
+  input logic  [P.NORMSHIFTSZ-1:0] Shifted,                // the shifted sum before LZA correction
  // divsqrt
  input logic                     DivOp,                  // is it a divsqrt opperation
  input logic                     DivResSubnorm,          // is the divsqrt result subnormal
-  input logic  [`NE+1:0]          DivQe,                  // the divsqrt result's exponent
+  input logic  [P.NE+1:0]          DivQe,                  // the divsqrt result's exponent
  input logic                     DivSubnormShiftPos,     // is the subnorm divider shift amount positive (ie not underflowed)
  //fma
  input logic                     FmaOp,                  // is it an fma opperation
-  input logic  [`NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
+  input logic  [P.NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
  input logic                     FmaPreResultSubnorm,    // is the result subnormal - calculated before LZA corection
  input logic                     FmaSZero,
  // output
-  output logic [`NE+1:0]          FmaMe,                  // exponent of the normalized sum
-  output logic [`CORRSHIFTSZ-1:0] Mf,                     // the shifted sum before LZA correction
-  output logic [`NE+1:0]          Qe                      // corrected exponent for divider
+  output logic [P.NE+1:0]          FmaMe,                  // exponent of the normalized sum
+  output logic [P.CORRSHIFTSZ-1:0] Mf,                     // the shifted sum before LZA correction
+  output logic [P.NE+1:0]          Qe                      // corrected exponent for divider
 );

-  logic [3*`NF+3:0]           CorrSumShifted;             // the shifted sum after LZA correction
-  logic [`CORRSHIFTSZ-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
-  logic [`CORRSHIFTSZ-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
+  logic [3*P.NF+3:0]           CorrSumShifted;             // the shifted sum after LZA correction
+  logic [P.CORRSHIFTSZ-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
+  logic [P.CORRSHIFTSZ-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
  logic                       ResSubnorm;                 // is the result Subnormal
  logic                       LZAPlus1;                   // add one or two to the sum's exponent due to LZA correction
  logic                       LeftShiftQm;                // should the divsqrt result be shifted one to the left

  // LZA correction
-  assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
+  assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1];

  // correct the shifting error caused by the LZA
  //  - the only possible mantissa for a plus two is all zeroes 
  //      - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-  mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
+  mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);

  // correct the shifting of the divsqrt caused by producing a result in (2, .5] range
  //    condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
  assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
-  assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
-  assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
-  mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
+  assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
+  assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
+  mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
  
  // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
  always_comb
-    if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
+    if(FmaOp)                       Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}};
    else if (DivOp&~DivResSubnorm)  Mf = CorrQmShifted;
-    else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    else                            Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ];
    
  // Determine sum's exponent
  //  main exponent issues: 
@ -82,12 +80,12 @@ module shiftcorrection(
  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
  //                          if plus1                    If plus2                               kill if the result Zero or actually subnormal
  //                          |                           |                                      |
-  assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
+  assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
  
  // recalculate if the result is subnormal after LZA correction
-  assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
+  assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1];

  // the quotent is in the range [.5,2) if there is no early termination
  // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
-  assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
-endmodule
+  assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
+endmodule
--- a/src/fpu/postproc/specialcase.sv
+++ b/src/fpu/postproc/specialcase.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module specialcase(
+module specialcase import cvw::*;  #(parameter cvw_t P) (
  input  logic                Xs,         // X sign
-  input  logic [`NF:0]        Xm, Ym, Zm, // input significand's
+  input  logic [P.NF:0]        Xm, Ym, Zm, // input significand's
  input  logic                XNaN, YNaN, ZNaN, // are the inputs NaN
  input  logic [2:0]          Frm,        // rounding mode
-  input  logic [`FMTBITS-1:0] OutFmt,     // output format
+  input  logic [P.FMTBITS-1:0] OutFmt,     // output format
  input  logic                InfIn,      // are any inputs infinity
  input  logic                NaNIn,      // are any input NaNs
  input  logic                XInf, YInf, // are X or Y inifnity
@ -41,9 +39,9 @@ module specialcase(
  input  logic                Plus1,      // do you add one for rounding
  input  logic                Rs,         // the result's sign
  input  logic                Invalid, Overflow,  // flags to choose the result
-  input  logic [`NE-1:0]      Re,         // Result exponent
-  input  logic [`NE+1:0]      FullRe,     // Result full exponent
-  input  logic [`NF-1:0]      Rf,         // Result fraction
+  input  logic [P.NE-1:0]      Re,         // Result exponent
+  input  logic [P.NE+1:0]      FullRe,     // Result full exponent
+  input  logic [P.NF-1:0]      Rf,         // Result fraction
  // fma
  input  logic                FmaOp,      // is it a fma opperation
  // divsqrt
@ -55,23 +53,23 @@ module specialcase(
  input  logic                IntToFp,    // is cvt int -> fp opperation
  input  logic                Int64,      // is the integer 64 bits
  input  logic                Signed,     // is the integer signed
-  input  logic [`NE:0]        CvtCe,      // the calculated expoent for cvt
+  input  logic [P.NE:0]        CvtCe,      // the calculated expoent for cvt
  input  logic                IntInvalid, // integer invalid flag to choose the result
  input  logic                CvtResUf,   // does the convert result underflow
-  input  logic [`XLEN+1:0]    CvtNegRes,  // the possibly negated of the integer result
+  input  logic [P.XLEN+1:0]    CvtNegRes,  // the possibly negated of the integer result
  // outputs
-  output logic [`FLEN-1:0]    PostProcRes,// final result
-  output logic [`XLEN-1:0]    FCvtIntRes  // final integer result
+  output logic [P.FLEN-1:0]    PostProcRes,// final result
+  output logic [P.XLEN-1:0]    FCvtIntRes  // final integer result
 );

-  logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
-  logic [`FLEN-1:0]   YNaNRes;    // Y is NaN result
-  logic [`FLEN-1:0]   ZNaNRes;    // Z is NaN result
-  logic [`FLEN-1:0]   InvalidRes; // Invalid result result
-  logic [`FLEN-1:0]   UfRes;      // underflowed result result
-  logic [`FLEN-1:0]   OfRes;      // overflowed result result
-  logic [`FLEN-1:0]   NormRes;    // normal result
-  logic [`XLEN-1:0]   OfIntRes;   // the overflow result for integer output
+  logic [P.FLEN-1:0]   XNaNRes;    // X is NaN result
+  logic [P.FLEN-1:0]   YNaNRes;    // Y is NaN result
+  logic [P.FLEN-1:0]   ZNaNRes;    // Z is NaN result
+  logic [P.FLEN-1:0]   InvalidRes; // Invalid result result
+  logic [P.FLEN-1:0]   UfRes;      // underflowed result result
+  logic [P.FLEN-1:0]   OfRes;      // overflowed result result
+  logic [P.FLEN-1:0]   NormRes;    // normal result
+  logic [P.XLEN-1:0]   OfIntRes;   // the overflow result for integer output
  logic               OfResMax;   // does the of result output maximum norm fp number
  logic               KillRes;    // kill the result for underflow
  logic               SelOfRes;   // should the overflow result be selected
@ -82,158 +80,158 @@ module specialcase(
  assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));

  // select correct outputs for special cases
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin
      //NaN res selection depending on standard
-      if(`IEEE754) begin
-          assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-          assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-          assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+      if(P.IEEE754) begin
+          assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+          assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+          assign ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
      end else begin
-          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
      end

-      assign OfRes =  OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-      assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+      assign OfRes =  OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+      assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
      assign NormRes = {Rs, Re, Rf};

-  end else if (`FPSIZES == 2) begin
-      if(`IEEE754) begin
-          assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-          assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-          assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+  end else if (P.FPSIZES == 2) begin
+      if(P.IEEE754) begin
+          assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+          assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+          assign ZNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
      end else begin 
-          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
      end

      always_comb
          if(OutFmt)
-              if(OfResMax)    OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
-              else            OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+              if(OfResMax)    OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
+              else            OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
          else
-              if(OfResMax)    OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
-              else            OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
-      assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+              if(OfResMax)    OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
+              else            OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+      assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};

-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
      always_comb
          case (OutFmt)
-              `FMT: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+              P.FMT: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                  end else begin 
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                  end
                  
-                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
                  NormRes = {Rs, Re, Rf};
              end
-              `FMT1: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-                      YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-                      ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+              P.FMT1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+                      YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+                      ZNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
                  end else begin 
-                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
                  end
-                  OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
-                  UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+                  UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
              end
-              `FMT2: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
-                      YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
-                      ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
-                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+              P.FMT2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
+                      YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
+                      ZNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF2]};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
                  end else begin 
-                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
                  end
                  
-                  OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
-                  UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
+                  UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
              end
              default: begin
-                  if(`IEEE754) begin
-                      XNaNRes = (`FLEN)'(0);
-                      YNaNRes = (`FLEN)'(0);
-                      ZNaNRes = (`FLEN)'(0);
-                      InvalidRes = (`FLEN)'(0);
+                  if(P.IEEE754) begin
+                      XNaNRes = (P.FLEN)'(0);
+                      YNaNRes = (P.FLEN)'(0);
+                      ZNaNRes = (P.FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
                  end else begin 
-                      InvalidRes = (`FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
                  end
-                  OfRes = (`FLEN)'(0);
-                  UfRes = (`FLEN)'(0);
-                  NormRes = (`FLEN)'(0);
+                  OfRes = (P.FLEN)'(0);
+                  UfRes = (P.FLEN)'(0);
+                  NormRes = (P.FLEN)'(0);
              end
          endcase

-  end else if (`FPSIZES == 4) begin 
+  end else if (P.FPSIZES == 4) begin 
      always_comb
          case (OutFmt)
              2'h3: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                  end else begin 
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                  end
                  
-                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
                  NormRes = {Rs, Re, Rf};
              end
              2'h1: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
-                      YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
-                      ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
-                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
+                      YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
+                      ZNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.D_NF]};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
                  end else begin 
-                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
                  end
-                  OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                  UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
+                  OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
+                  UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
              end
              2'h0: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
-                      YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
-                      ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
-                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
+                      YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
+                      ZNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.S_NF]};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
                  end else begin 
-                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
                  end
                  
-                  OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                  UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
+                  OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
+                  UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
              end
              2'h2: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
-                      YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
-                      ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
-                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
+                      YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
+                      ZNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.H_NF]};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
                  end else begin 
-                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
                  end
                  
-                  OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+                  OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};      
                // zero is exact if dividing by infinity so don't add 1
-                  UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
+                  UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
              end
          endcase
  end
@ -242,13 +240,13 @@ module specialcase(
  //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
  //      - dont set to zero if fp input is zero but not using the fp input
  //      - dont set to zero if int input is zero but not using the int input
-  assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
+  assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
  
  // calculate if the overflow result should be selected
  assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
  
  // output infinity with result sign if divide by zero
-  if(`IEEE754)
+  if(P.IEEE754)
    always_comb
      if(XNaN&~(IntToFp&CvtOp))   PostProcRes = XNaNRes;
      else if(YNaN&~CvtOp)        PostProcRes = YNaNRes;
@ -283,14 +281,14 @@ module specialcase(
  always_comb
    if(Signed)
      if(Xs&~NaNIn) // signed negitive
-        if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
-        else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
+        if(Int64)   OfIntRes = {1'b1, {P.XLEN-1{1'b0}}};
+        else        OfIntRes = {{P.XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
      else          // signed positive
-        if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
-        else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
+        if(Int64)   OfIntRes = {1'b0, {P.XLEN-1{1'b1}}};
+        else        OfIntRes = {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
    else
-      if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
-      else          OfIntRes = {`XLEN{1'b1}}; // unsigned positive
+      if(Xs&~NaNIn) OfIntRes = {P.XLEN{1'b0}}; // unsigned negitive
+      else          OfIntRes = {P.XLEN{1'b1}}; // unsigned positive


  // select the integer output
@ -301,9 +299,9 @@ module specialcase(
  //      - otherwise output the normal res (trmined and sign extended if nessisary)
  always_comb
    if(IntInvalid)          FCvtIntRes = OfIntRes;
-    else if(CvtCe[`NE]) 
-      if(Xs&Signed&Plus1)   FCvtIntRes = {{`XLEN{1'b1}}};
-      else                  FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
-    else if(Int64)          FCvtIntRes = CvtNegRes[`XLEN-1:0];
-    else                    FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
-endmodule
+    else if(CvtCe[P.NE]) 
+      if(Xs&Signed&Plus1)   FCvtIntRes = {{P.XLEN{1'b1}}};
+      else                  FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1};
+    else if(Int64)          FCvtIntRes = CvtNegRes[P.XLEN-1:0];
+    else                    FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
+endmodule
--- a/src/fpu/unpack.sv
+++ b/src/fpu/unpack.sv
@ -25,41 +25,40 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module unpack ( 
-  input  logic [`FLEN-1:0]        X, Y, Z,              // inputs from register file
-  input  logic [`FMTBITS-1:0]     Fmt,                  // format signal 00 - single 01 - double 11 - quad 10 - half
+module unpack import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FLEN-1:0]       X, Y, Z,              // inputs from register file
+  input  logic [P.FMTBITS-1:0]    Fmt,                  // format signal 00 - single 01 - double 11 - quad 10 - half
  input  logic                    XEn, YEn, ZEn,        // input enables
  output logic                    Xs, Ys, Zs,           // sign bits of XYZ
-  output logic [`NE-1:0]          Xe, Ye, Ze,           // exponents of XYZ (converted to largest supported precision)
-  output logic [`NF:0]            Xm, Ym, Zm,           // mantissas of XYZ (converted to largest supported precision)
+  output logic [P.NE-1:0]         Xe, Ye, Ze,           // exponents of XYZ (converted to largest supported precision)
+  output logic [P.NF:0]           Xm, Ym, Zm,           // mantissas of XYZ (converted to largest supported precision)
  output logic                    XNaN, YNaN, ZNaN,     // is XYZ a NaN
  output logic                    XSNaN, YSNaN, ZSNaN,  // is XYZ a signaling NaN
  output logic                    XSubnorm,             // is X subnormal
  output logic                    XZero, YZero, ZZero,  // is XYZ zero
  output logic                    XInf, YInf, ZInf,     // is XYZ infinity
  output logic                    XExpMax,              // does X have the maximum exponent (NaN or Inf)
-  output logic [`FLEN-1:0]        XPostBox              // X after being properly NaN-boxed
+  output logic [P.FLEN-1:0]       XPostBox              // X after being properly NaN-boxed
 );

  logic XExpNonZero, YExpNonZero, ZExpNonZero;          // is the exponent of XYZ non-zero
  logic XFracZero, YFracZero, ZFracZero;                // is the fraction zero
  logic YExpMax, ZExpMax;                               // is the exponent all 1s
  
-  unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
+  unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
                          .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
                          .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), 
                          .Subnorm(XSubnorm), .PostBox(XPostBox));

-  unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
+  unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
                          .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
                          .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), 
                          .Subnorm(), .PostBox());

-  unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
+  unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
                          .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
                          .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), 
                          .Subnorm(), .PostBox());
 
- endmodule
+ endmodule
--- a/src/fpu/unpackinput.sv
+++ b/src/fpu/unpackinput.sv
@ -25,15 +25,14 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"

-module unpackinput ( 
-  input  logic [`FLEN-1:0]        In,         // inputs from register file
+module unpackinput import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FLEN-1:0]        In,         // inputs from register file
  input  logic                    En,         // enable the input
-  input  logic [`FMTBITS-1:0]     Fmt,        // format signal 00 - single 01 - double 11 - quad 10 - half
+  input  logic [P.FMTBITS-1:0]     Fmt,        // format signal 00 - single 01 - double 11 - quad 10 - half
  output logic                    Sgn,        // sign bits of the number 
-  output logic [`NE-1:0]          Exp,        // exponent of the number  (converted to largest supported precision)
-  output logic [`NF:0]            Man,        // mantissa of the number  (converted to largest supported precision)
+  output logic [P.NE-1:0]          Exp,        // exponent of the number  (converted to largest supported precision)
+  output logic [P.NF:0]            Man,        // mantissa of the number  (converted to largest supported precision)
  output logic                    NaN,        // is the number a NaN
  output logic                    SNaN,       // is the number a signaling NaN
  output logic                    Zero,       // is the number zero
@ -42,29 +41,29 @@ module unpackinput (
  output logic                    FracZero,   // is the fraction zero
  output logic                    ExpMax,     // does In have the maximum exponent (NaN or Inf)
  output logic                    Subnorm,    // is the number subnormal
-  output logic [`FLEN-1:0]        PostBox     // Number reboxed correctly as a NaN
+  output logic [P.FLEN-1:0]        PostBox     // Number reboxed correctly as a NaN
 );

-  logic [`NF-1:0] Frac;       // Fraction of XYZ
+  logic [P.NF-1:0] Frac;       // Fraction of XYZ
  logic           BadNaNBox;  // incorrectly NaN Boxed

-  if (`FPSIZES == 1) begin        // if there is only one floating point format supported
+  if (P.FPSIZES == 1) begin        // if there is only one floating point format supported
      assign BadNaNBox = 0;
-      assign Sgn = In[`FLEN-1];  // sign bit
-      assign Frac = In[`NF-1:0];  // fraction (no assumed 1)
-      assign ExpNonZero = |In[`FLEN-2:`NF];  // is the exponent non-zero
-      assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};  // exponent.  subnormal numbers have effective biased exponent of 1
-      assign ExpMax = &In[`FLEN-2:`NF];  // is the exponent all 1's
+      assign Sgn = In[P.FLEN-1];  // sign bit
+      assign Frac = In[P.NF-1:0];  // fraction (no assumed 1)
+      assign ExpNonZero = |In[P.FLEN-2:P.NF];  // is the exponent non-zero
+      assign Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero};  // exponent.  subnormal numbers have effective biased exponent of 1
+      assign ExpMax = &In[P.FLEN-2:P.NF];  // is the exponent all 1's
      assign PostBox = In;
  
-  end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
+  end else if (P.FPSIZES == 2) begin   // if there are 2 floating point formats supported
      // largest format | smaller format
      //----------------------------------
-      //      `FLEN     |     `LEN1       length of floating point number
-      //      `NE       |     `NE1        length of exponent
-      //      `NF       |     `NF1        length of fraction
-      //      `BIAS     |     `BIAS1      exponent's bias value
-      //      `FMT      |     `FMT1       precision's format value - Q=11 D=01 Sticky=00 H=10
+      //      P.FLEN     |     P.LEN1       length of floating point number
+      //      P.NE       |     P.NE1        length of exponent
+      //      P.NF       |     P.NF1        length of fraction
+      //      P.BIAS     |     P.BIAS1      exponent's bias value
+      //      P.FMT      |     P.FMT1       precision's format value - Q=11 D=01 Sticky=00 H=10

      // Possible combinantions specified by spec:
      //      double and single
@ -76,22 +75,22 @@ module unpackinput (
      //      quad   and half
      //      double and half

-      assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
+      assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
      always_comb
        if (BadNaNBox) begin
-//          PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
-          PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
+//          PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
+          PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
        end else 
          PostBox = In;

      // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-      assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive
+      assign Sgn = Fmt ? In[P.FLEN-1] : (BadNaNBox ? 0 : In[P.LEN1-1]); // improperly boxed NaNs are treated as positive

      // extract the fraction, add trailing zeroes to the mantissa if nessisary
-      assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
+      assign Frac = Fmt ? In[P.NF-1:0] : {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};

      // is the exponent non-zero
-      assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
+      assign ExpNonZero = Fmt ? |In[P.FLEN-2:P.NF] : |In[P.LEN1-2:P.NF1]; 

      // example double to single conversion:
      // 1023 = 0011 1111 1111
@ -103,21 +102,21 @@ module unpackinput (

      // extract the exponent, converting the smaller exponent into the larger precision if nessisary
      //      - if the original precision had a Subnormal number convert the exponent value 1
-      assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+      assign Exp = Fmt ? {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero} : {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero}; 

      // is the exponent all 1's
-      assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
+      assign ExpMax = Fmt ? &In[P.FLEN-2:P.NF] : &In[P.LEN1-2:P.NF1];
  

-  end else if (`FPSIZES == 3) begin       // three floating point precsions supported
+  end else if (P.FPSIZES == 3) begin       // three floating point precsions supported

      // largest format | larger format  | smallest format
      //---------------------------------------------------
-      //      `FLEN     |     `LEN1      |    `LEN2       length of floating point number
-      //      `NE       |     `NE1       |    `NE2        length of exponent
-      //      `NF       |     `NF1       |    `NF2        length of fraction
-      //      `BIAS     |     `BIAS1     |    `BIAS2      exponent's bias value
-      //      `FMT      |     `FMT1      |    `FMT2       precision's format value - Q=11 D=01 Sticky=00 H=10
+      //      P.FLEN     |     P.LEN1      |    P.LEN2       length of floating point number
+      //      P.NE       |     P.NE1       |    P.NE2        length of exponent
+      //      P.NF       |     P.NF1       |    P.NF2        length of fraction
+      //      P.BIAS     |     P.BIAS1     |    P.BIAS2      exponent's bias value
+      //      P.FMT      |     P.FMT1      |    P.FMT2       precision's format value - Q=11 D=01 Sticky=00 H=10

      // Possible combinantions specified by spec:
      //      quad   and double and single
@ -130,20 +129,20 @@ module unpackinput (
      // Check NaN boxing
      always_comb
          case (Fmt)
-              `FMT:  BadNaNBox = 0;
-              `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
-              `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
+              P.FMT:  BadNaNBox = 0;
+              P.FMT1: BadNaNBox = ~&In[P.FLEN-1:P.LEN1];
+              P.FMT2: BadNaNBox = ~&In[P.FLEN-1:P.LEN2];
              default: BadNaNBox = 1'bx;
          endcase

      always_comb
        if (BadNaNBox) begin
          case (Fmt)
-            `FMT: PostBox = In;
-//            `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
-//            `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]};
-            `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
-            `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}};
+            P.FMT: PostBox = In;
+//            P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
+//            P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
+            P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
+            P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
            default: PostBox = 'x;
          endcase
        end else 
@ -154,27 +153,27 @@ module unpackinput (
        if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
        else
          case (Fmt)
-              `FMT:  Sgn = In[`FLEN-1];
-              `FMT1: Sgn = In[`LEN1-1];
-              `FMT2: Sgn = In[`LEN2-1];
+              P.FMT:  Sgn = In[P.FLEN-1];
+              P.FMT1: Sgn = In[P.LEN1-1];
+              P.FMT2: Sgn = In[P.LEN2-1];
              default: Sgn = 1'bx;
          endcase

       // extract the fraction
      always_comb
          case (Fmt)
-              `FMT: Frac = In[`NF-1:0];
-              `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
-              `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
-              default: Frac = {`NF{1'bx}};
+              P.FMT: Frac = In[P.NF-1:0];
+              P.FMT1: Frac = {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};
+              P.FMT2: Frac = {In[P.NF2-1:0], (P.NF-P.NF2)'(0)};
+              default: Frac = {P.NF{1'bx}};
          endcase

      // is the exponent non-zero
      always_comb
          case (Fmt)
-              `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
-              `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
-              `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
+              P.FMT:  ExpNonZero = |In[P.FLEN-2:P.NF];     // if input is largest precision (P.FLEN - ie quad or double)
+              P.FMT1: ExpNonZero = |In[P.LEN1-2:P.NF1];  // if input is larger precsion (P.LEN1 - double or single)
+              P.FMT2: ExpNonZero = |In[P.LEN2-2:P.NF2]; // if input is smallest precsion (P.LEN2 - single or half)
              default: ExpNonZero = 1'bx; 
          endcase
          
@ -189,50 +188,50 @@ module unpackinput (
      // convert the larger precision's exponent to use the largest precision's bias
      always_comb 
          case (Fmt)
-              `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
-              `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
-              `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
-              default: Exp = {`NE{1'bx}};
+              P.FMT:  Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero};
+              P.FMT1: Exp = {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero}; 
+              P.FMT2: Exp = {In[P.LEN2-2], {P.NE-P.NE2{~In[P.LEN2-2]}}, In[P.LEN2-3:P.NF2+1], In[P.NF2]|~ExpNonZero}; 
+              default: Exp = {P.NE{1'bx}};
          endcase

      // is the exponent all 1's
      always_comb
          case (Fmt)
-              `FMT:  ExpMax = &In[`FLEN-2:`NF];
-              `FMT1: ExpMax = &In[`LEN1-2:`NF1];
-              `FMT2: ExpMax = &In[`LEN2-2:`NF2];
+              P.FMT:  ExpMax = &In[P.FLEN-2:P.NF];
+              P.FMT1: ExpMax = &In[P.LEN1-2:P.NF1];
+              P.FMT2: ExpMax = &In[P.LEN2-2:P.NF2];
              default: ExpMax = 1'bx;
          endcase

-  end else if (`FPSIZES == 4) begin      // if all precsisons are supported - quad, double, single, and half
+  end else if (P.FPSIZES == 4) begin      // if all precsisons are supported - quad, double, single, and half
  
      //    quad   |  double  |  single  |  half    
      //-------------------------------------------------------------------
-      //   `Q_LEN  |  `D_LEN  |  `S_LEN  |  `H_LEN     length of floating point number
-      //   `Q_NE   |  `D_NE   |  `S_NE   |  `H_NE      length of exponent
-      //   `Q_NF   |  `D_NF   |  `S_NF   |  `H_NF      length of fraction
-      //   `Q_BIAS |  `D_BIAS |  `S_BIAS |  `H_BIAS    exponent's bias value
-      //   `Q_FMT  |  `D_FMT  |  `S_FMT  |  `H_FMT     precision's format value - Q=11 D=01 Sticky=00 H=10
+      //   P.Q_LEN  |  P.D_LEN  |  P.S_LEN  |  P.H_LEN     length of floating point number
+      //   P.Q_NE   |  P.D_NE   |  P.S_NE   |  P.H_NE      length of exponent
+      //   P.Q_NF   |  P.D_NF   |  P.S_NF   |  P.H_NF      length of fraction
+      //   P.Q_BIAS |  P.D_BIAS |  P.S_BIAS |  P.H_BIAS    exponent's bias value
+      //   P.Q_FMT  |  P.D_FMT  |  P.S_FMT  |  P.H_FMT     precision's format value - Q=11 D=01 Sticky=00 H=10

      // Check NaN boxing
      always_comb
          case (Fmt)
              2'b11: BadNaNBox = 0;
-              2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
-              2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
-              2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
+              2'b01: BadNaNBox = ~&In[P.Q_LEN-1:P.D_LEN];
+              2'b00: BadNaNBox = ~&In[P.Q_LEN-1:P.S_LEN];
+              2'b10: BadNaNBox = ~&In[P.Q_LEN-1:P.H_LEN];
          endcase

      always_comb
        if (BadNaNBox) begin
          case (Fmt)
            2'b11: PostBox = In;
-//            2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]};
-//            2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]};
-//            2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]};
-            2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}};
-            2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}};
-            2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}};
+//            2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
+//            2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
+//            2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
+            2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
+            2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
+            2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
          endcase
        end else 
          PostBox = In;
@ -242,29 +241,29 @@ module unpackinput (
        if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
        else
          case (Fmt)
-              2'b11: Sgn = In[`Q_LEN-1];
-              2'b01: Sgn = In[`D_LEN-1];
-              2'b00: Sgn = In[`S_LEN-1];
-              2'b10: Sgn = In[`H_LEN-1];
+              2'b11: Sgn = In[P.Q_LEN-1];
+              2'b01: Sgn = In[P.D_LEN-1];
+              2'b00: Sgn = In[P.S_LEN-1];
+              2'b10: Sgn = In[P.H_LEN-1];
          endcase
          

      // extract the fraction
      always_comb
          case (Fmt)
-              2'b11: Frac = In[`Q_NF-1:0];
-              2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-              2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-              2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
+              2'b11: Frac = In[P.Q_NF-1:0];
+              2'b01: Frac = {In[P.D_NF-1:0], (P.Q_NF-P.D_NF)'(0)};
+              2'b00: Frac = {In[P.S_NF-1:0], (P.Q_NF-P.S_NF)'(0)};
+              2'b10: Frac = {In[P.H_NF-1:0], (P.Q_NF-P.H_NF)'(0)};
          endcase

      // is the exponent non-zero
      always_comb
          case (Fmt)
-              2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
-              2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
-              2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
-              2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; 
+              2'b11: ExpNonZero = |In[P.Q_LEN-2:P.Q_NF];
+              2'b01: ExpNonZero = |In[P.D_LEN-2:P.D_NF];
+              2'b00: ExpNonZero = |In[P.S_LEN-2:P.S_NF]; 
+              2'b10: ExpNonZero = |In[P.H_LEN-2:P.H_NF]; 
          endcase


@ -280,20 +279,20 @@ module unpackinput (
      // 1 is added to the exponent if the input is zero or subnormal
      always_comb
          case (Fmt)
-              2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
-              2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
-              2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
-              2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; 
+              2'b11: Exp = {In[P.Q_LEN-2:P.Q_NF+1], In[P.Q_NF]|~ExpNonZero};
+              2'b01: Exp = {In[P.D_LEN-2], {P.Q_NE-P.D_NE{~In[P.D_LEN-2]}}, In[P.D_LEN-3:P.D_NF+1], In[P.D_NF]|~ExpNonZero};
+              2'b00: Exp = {In[P.S_LEN-2], {P.Q_NE-P.S_NE{~In[P.S_LEN-2]}}, In[P.S_LEN-3:P.S_NF+1], In[P.S_NF]|~ExpNonZero};
+              2'b10: Exp = {In[P.H_LEN-2], {P.Q_NE-P.H_NE{~In[P.H_LEN-2]}}, In[P.H_LEN-3:P.H_NF+1], In[P.H_NF]|~ExpNonZero}; 
          endcase


      // is the exponent all 1's
      always_comb 
          case (Fmt)
-              2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
-              2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
-              2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
-              2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
+              2'b11: ExpMax = &In[P.Q_LEN-2:P.Q_NF];
+              2'b01: ExpMax = &In[P.D_LEN-2:P.D_NF];
+              2'b00: ExpMax = &In[P.S_LEN-2:P.S_NF];
+              2'b10: ExpMax = &In[P.H_LEN-2:P.H_NF];
          endcase

  end
@ -302,9 +301,9 @@ module unpackinput (
  assign FracZero = ~|Frac & ~BadNaNBox; // is the fraction zero?
  assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand
  assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
-  assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
+  assign SNaN = NaN&~Frac[P.NF-1]&~BadNaNBox; // is the input a singnaling NaN?
  assign Inf = ExpMax & FracZero & En; // is the input infinity?
  assign Zero = ~ExpNonZero & FracZero; // is the input zero?
  assign Subnorm = ~ExpNonZero & ~FracZero & ~BadNaNBox; // is the input subnormal

-endmodule
+endmodule
--- a/src/hazard/hazard.sv
+++ b/src/hazard/hazard.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module hazard (
  // Detect hazards
  input  logic  BPWrongE, CSRWriteFenceM, RetM, TrapM,   
--- a/src/ieu/bmu/bmuctrl.sv
+++ b/src/ieu/bmu/bmuctrl.sv
@ -27,9 +27,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module bmuctrl(
+module bmuctrl import cvw::*;  #(parameter cvw_t P) (
  input  logic        clk, reset,
  // Decode stage control signals
  input  logic        StallD, FlushD,          // Stall, flush Decode stage
@ -76,13 +74,13 @@ module bmuctrl(
  always_comb begin
    // BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD
    BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1;  // default: Illegal bmu instruction;
-    if (`ZBA_SUPPORTED) begin
+    if (P.ZBA_SUPPORTED) begin
      casez({OpD, Funct7D, Funct3D})
        17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0;  // sh1add
        17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0;  // sh2add
        17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0;  // sh3add
      endcase
-      if (`XLEN==64)
+      if (P.XLEN==64)
        casez({OpD, Funct7D, Funct3D})
          17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0;  // sh1add.uw
          17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0;  // sh2add.uw
@ -91,7 +89,7 @@ module bmuctrl(
          17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0;  // slli.uw
        endcase
    end
-    if (`ZBB_SUPPORTED) begin
+    if (P.ZBB_SUPPORTED) begin
      casez({OpD, Funct7D, Funct3D})
        17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0;  // rol
        17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0;  // ror
@ -100,13 +98,13 @@ module bmuctrl(
                                else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0]))
                                  BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0;  // count instruction
 //        // coverage off: This case can't occur in RV64
-//        17'b0110011_0000100_100: if (`XLEN == 32)
+//        17'b0110011_0000100_100: if (P.XLEN == 32)
 //                                  BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0;  // zexth (rv32)
 //        // coverage on
        17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0;  // andn
        17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0;  // orn
        17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0;  // xnor
-        17'b0010011_011010?_101: if ((`XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
+        17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // rev8
        17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // orc.b
@ -115,12 +113,12 @@ module bmuctrl(
        17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // min
        17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // minu
      endcase
-      if (`XLEN==32)
+      if (P.XLEN==32)
        casez({OpD, Funct7D, Funct3D})
          17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0;  // zexth (rv32)
          17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0;  // rori (rv32)                          
        endcase
-      else if (`XLEN==64)
+      else if (P.XLEN==64)
        casez({OpD, Funct7D, Funct3D})
          17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0;  // zexth (rv64)
          17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0;  // rolw
@ -131,25 +129,25 @@ module bmuctrl(
                                    BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0;  // count word instruction
        endcase
    end
-    if (`ZBC_SUPPORTED)
+    if (P.ZBC_SUPPORTED)
      casez({OpD, Funct7D, Funct3D})
        17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0;  // ZBC instruction
      endcase
-    if (`ZBS_SUPPORTED) begin // ZBS
+    if (P.ZBS_SUPPORTED) begin // ZBS
      casez({OpD, Funct7D, Funct3D})
        17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0;  // bclr
        17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0;  // bext
        17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0;  // binv
        17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0;  // bset
      endcase
-      if (`XLEN==32) // ZBS 64-bit
+      if (P.XLEN==32) // ZBS 64-bit
        casez({OpD, Funct7D, Funct3D})
          17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0;  // bclri
          17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0;  // bexti
          17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0;  // binvi
          17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0;  // bseti
        endcase
-      else if (`XLEN==64) // ZBS 64-bit
+      else if (P.XLEN==64) // ZBS 64-bit
        casez({OpD, Funct7D, Funct3D})
          17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0;  // bclri (rv64)
          17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0;  // bexti (rv64)
@ -157,7 +155,7 @@ module bmuctrl(
          17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0;  // bseti (rv64)
        endcase
    end
-    if (`ZBB_SUPPORTED | `ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
+    if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
      casez({OpD, Funct7D, Funct3D})
        17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0;  // sra, srl, sll
        17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0;  // srai, srli, slli
@ -176,5 +174,5 @@ module bmuctrl(
  assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);

  // BMU Execute stage pipieline control register
-  flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
+  flopenrc #(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
 endmodule
--- a/src/ieu/controller.sv
+++ b/src/ieu/controller.sv
@ -27,10 +27,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-
-module controller(
+module controller import cvw::*;  #(parameter cvw_t P) (
  input  logic        clk, reset,
  // Decode stage control signals
  input  logic        StallD, FlushD,          // Stall, flush Decode stage
@ -142,30 +139,30 @@ module controller(
  // Be rigorous about detecting illegal instructions if CSRs or bit manipulation is supported
  // otherwise be cheap

-  if (`ZICSR_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED | `ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
+  if (P.ZICSR_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED | P.ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
    logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD;
    logic Funct7ShiftZeroD, Funct7Shiftb5D;

    assign Funct7ZeroD      = (Funct7D == 7'b0000000); // most R-type instructions
    assign Funct7b5D        = (Funct7D == 7'b0100000); // srai, sub
-    assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
-    assign Funct7Shiftb5D   = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
+    assign Funct7ShiftZeroD = (P.XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
+    assign Funct7Shiftb5D   = (P.XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
    assign IShiftD          = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms
    assign INoShiftD        = ((Funct3D != 3'b001) & (Funct3D != 3'b101));
    assign IFunctD          = IShiftD | INoShiftD;
    assign RFunctD          = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD;
-    assign MFunctD          = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
+    assign MFunctD          = (Funct7D == 7'b0000001) & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
    assign LFunctD          = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 | 
-                              ((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
+                              ((P.XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
    assign SFunctD          = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | 
-                              ((`XLEN == 64) & (Funct3D == 3'b011));
+                              ((P.XLEN == 64) & (Funct3D == 3'b011));
    assign BFunctD          = (Funct3D[2:1] != 2'b01); // legal branches
    assign JFunctD          = (Funct3D == 3'b000);
    assign IWValidFunct3D   = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b101;
  end else begin:legalcheck2
    assign IFunctD = 1; // Don't bother to separate out shift decoding
    assign RFunctD = ~Funct7D[0]; // Not a multiply
-    assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
+    assign MFunctD = Funct7D[0] & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
    assign LFunctD = 1; // don't bother to check Funct3 for loads
    assign SFunctD = 1; // don't bother to check Funct3 for stores
    assign BFunctD = 1; // don't bother to check Funct3 for branches
@ -182,19 +179,19 @@ module controller(
     7'b0000011: if (LFunctD) 
                      ControlsD = `CTRLW'b1_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // loads
      7'b0000111:     ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported
-      7'b0001111: if (`ZIFENCEI_SUPPORTED)
+      7'b0001111: if (P.ZIFENCEI_SUPPORTED)
                      ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence
                  else
                      ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop
      7'b0010011: if (IFunctD)    
                      ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
      7'b0010111:     ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc
-      7'b0011011: if (IFunctD & IWValidFunct3D & `XLEN == 64)
+      7'b0011011: if (IFunctD & IWValidFunct3D & P.XLEN == 64)
                      ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_1_0_0_0_0_00_0; // IW-type ALU for RV64i
      7'b0100011: if (SFunctD) 
                      ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // stores
      7'b0100111:     ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_1; // fsw - only legal if FP supported
-      7'b0101111: if (`A_SUPPORTED) begin
+      7'b0101111: if (P.A_SUPPORTED) begin
                    if (InstrD[31:27] == 5'b00010)
                      ControlsD = `CTRLW'b1_000_00_10_001_0_0_0_0_0_0_0_0_0_01_0; // lr
                    else if (InstrD[31:27] == 5'b00011)
@ -207,16 +204,16 @@ module controller(
                  else if (MFunctD)
                      ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide
      7'b0110111:     ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui
-      7'b0111011: if (RFunctD & (`XLEN == 64))
+      7'b0111011: if (RFunctD & (P.XLEN == 64))
                      ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i
-                  else if (MFunctD & (`XLEN == 64))
+                  else if (MFunctD & (P.XLEN == 64))
                      ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide
      7'b1100011: if (BFunctD)   
                      ControlsD = `CTRLW'b0_010_11_00_000_1_0_0_0_0_0_0_0_0_00_0; // branches
      7'b1100111: if (JFunctD)
                      ControlsD = `CTRLW'b1_000_01_00_000_0_0_1_1_0_0_0_0_0_00_0; // jalr
      7'b1101111:     ControlsD = `CTRLW'b1_011_11_00_000_0_0_1_1_0_0_0_0_0_00_0; // jal
-      7'b1110011: if (`ZICSR_SUPPORTED) begin
+      7'b1110011: if (P.ZICSR_SUPPORTED) begin
                   if (Funct3D == 3'b000)
                      ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_1_0_0_00_0; // privileged; decoded further in priveleged modules
                   else
@ -229,7 +226,7 @@ module controller(
  // Unswizzle control bits
  // Squash control signals if coming from an illegal compressed instruction
  // On RV32E, can't write to upper 16 registers.  Checking reads to upper 16 is more costly so disregard them.
-  assign IllegalERegAdrD = `E_SUPPORTED & `ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11]; 
+  assign IllegalERegAdrD = P.E_SUPPORTED & P.ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11]; 
  //assign IllegalBaseInstrD = 1'b0;
  assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD,
          ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD, 
@ -247,17 +244,17 @@ module controller(
  assign BaseSubArithD = ALUOpD & (subD | sraD | sltD | sltuD);

  // bit manipulation Configuration Block
-  if (`ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
+  if (P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
    logic IllegalBitmanipInstrD;          // Unrecognized B instruction
    logic BRegWriteD;                     // Indicates if it is a R type BMU instruction in decode stage
    logic BW64D;                          // Indicates if it is a W type BMU instruction in decode stage
    logic BSubArithD;                     // TRUE for BMU ext, clr, andn, orn, xnor
    logic BALUSrcBD;                      // BMU alu src select signal

-    bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, 
+    bmuctrl #(P) bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, 
      .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, 
      .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
-    if (`ZBA_SUPPORTED) begin
+    if (P.ZBA_SUPPORTED) begin
      // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
      assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
    end else assign sltD = (Funct3D == 3'b010);
@ -290,7 +287,7 @@ module controller(
  // Fences
  // Ordinary fence is presently a nop
  // fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented
-  if (`ZIFENCEI_SUPPORTED & `ICACHE_SUPPORTED) begin:fencei
+  if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei
    logic FenceID;
    assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction?
    assign InvalidateICacheD = FenceID;
@ -338,5 +335,5 @@ module controller(

  // the synchronous DTIM cannot read immediately after write
  // a cache cannot read or write immediately after a write
-  assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & `DCACHE_SUPPORTED)) | (|AtomicD));
+  assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED)) | (|AtomicD));
 endmodule
--- a/src/ieu/datapath.sv
+++ b/src/ieu/datapath.sv
@ -27,16 +27,14 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module datapath (
+module datapath import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk, reset,
  // Decode stage signals
  input  logic [2:0]       ImmSrcD,                 // Selects type of immediate extension
  input  logic [31:0]      InstrD,                  // Instruction in Decode stage
  // Execute stage signals
-  input  logic [`XLEN-1:0] PCE,                     // PC in Execute stage  
-  input  logic [`XLEN-1:0] PCLinkE,                 // PC + 4 (of instruction in Execute stage)
+  input  logic [P.XLEN-1:0] PCE,                     // PC in Execute stage  
+  input  logic [P.XLEN-1:0] PCLinkE,                 // PC + 4 (of instruction in Execute stage)
  input  logic [2:0]       Funct3E,                 // Funct3 field of instruction in Execute stage
  input  logic             StallE, FlushE,          // Stall, flush Execute stage
  input  logic [1:0]       ForwardAE, ForwardBE,    // Forward ALU operands from later stages
@ -51,24 +49,24 @@ module datapath (
  input  logic [2:0]       ZBBSelectE,              // ZBB mux select signal
  input  logic [2:0]       BALUControlE,            // ALU Control signals for B instructions in Execute Stage
  output logic [1:0]       FlagsE,                  // Comparison flags ({eq, lt})
-  output logic [`XLEN-1:0] IEUAdrE,                 // Address computed by ALU
-  output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
+  output logic [P.XLEN-1:0] IEUAdrE,                 // Address computed by ALU
+  output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
  // Memory stage signals
  input  logic             StallM, FlushM,          // Stall, flush Memory stage
  input  logic             FWriteIntM, FCvtIntW,    // FPU writes integer register file, FPU converts float to int
-  input  logic [`XLEN-1:0] FIntResM,                // FPU integer result
-  output logic [`XLEN-1:0] SrcAM,                   // ALU's Source A in Memory stage to privilege unit for CSR writes
-  output logic [`XLEN-1:0] WriteDataM,              // Write data in Memory stage
+  input  logic [P.XLEN-1:0] FIntResM,                // FPU integer result
+  output logic [P.XLEN-1:0] SrcAM,                   // ALU's Source A in Memory stage to privilege unit for CSR writes
+  output logic [P.XLEN-1:0] WriteDataM,              // Write data in Memory stage
  // Writeback stage signals
  input  logic             StallW, FlushW,          // Stall, flush Writeback stage
  input  logic             RegWriteW, IntDivW,      // Write register file, integer divide instruction
  input  logic             SquashSCW,               // Squash a store conditional when a conflict arose
  input  logic [2:0]       ResultSrcW,              // Select source of result to write back to register file
-  input  logic [`XLEN-1:0] FCvtIntResW,             // FPU convert fp to integer result
-  input  logic [`XLEN-1:0] ReadDataW,               // Read data from LSU
-  input  logic [`XLEN-1:0] CSRReadValW,             // CSR read result
-  input  logic [`XLEN-1:0] MDUResultW,              // MDU (Multiply/divide unit) result
-  input  logic [`XLEN-1:0] FIntDivResultW,          // FPU's integer divide result
+  input  logic [P.XLEN-1:0] FCvtIntResW,             // FPU convert fp to integer result
+  input  logic [P.XLEN-1:0] ReadDataW,               // Read data from LSU
+  input  logic [P.XLEN-1:0] CSRReadValW,             // CSR read result
+  input  logic [P.XLEN-1:0] MDUResultW,              // MDU (Multiply/divide unit) result
+  input  logic [P.XLEN-1:0] FIntDivResultW,          // FPU's integer divide result
   // Hazard Unit signals 
  output logic [4:0]       Rs1D, Rs2D, Rs1E, Rs2E,  // Register sources to read in Decode or Execute stage
  output logic [4:0]       RdE, RdM, RdW            // Register destinations in Execute, Memory, or Writeback stage
@ -76,64 +74,64 @@ module datapath (

  // Fetch stage signals
  // Decode stage signals
-  logic [`XLEN-1:0] R1D, R2D;                       // Read data from Rs1 (RD1), Rs2 (RD2)
-  logic [`XLEN-1:0] ImmExtD;                        // Extended immediate in Decode stage
+  logic [P.XLEN-1:0] R1D, R2D;                       // Read data from Rs1 (RD1), Rs2 (RD2)
+  logic [P.XLEN-1:0] ImmExtD;                        // Extended immediate in Decode stage
  logic [4:0]       RdD;                            // Destination register in Decode stage
  // Execute stage signals
-  logic [`XLEN-1:0] R1E, R2E;                       // Source operands read from register file
-  logic [`XLEN-1:0] ImmExtE;                        // Extended immediate in Execute stage 
-  logic [`XLEN-1:0] SrcAE, SrcBE;                   // ALU operands
-  logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
+  logic [P.XLEN-1:0] R1E, R2E;                       // Source operands read from register file
+  logic [P.XLEN-1:0] ImmExtE;                        // Extended immediate in Execute stage 
+  logic [P.XLEN-1:0] SrcAE, SrcBE;                   // ALU operands
+  logic [P.XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
  // Memory stage signals
-  logic [`XLEN-1:0] IEUResultM;                     // Result from execution stage
-  logic [`XLEN-1:0] IFResultM;                      // Result from either IEU or single-cycle FPU op writing an integer register
+  logic [P.XLEN-1:0] IEUResultM;                     // Result from execution stage
+  logic [P.XLEN-1:0] IFResultM;                      // Result from either IEU or single-cycle FPU op writing an integer register
  // Writeback stage signals
-  logic [`XLEN-1:0] SCResultW;                      // Store Conditional result
-  logic [`XLEN-1:0] ResultW;                        // Result to write to register file
-  logic [`XLEN-1:0] IFResultW;                      // Result from either IEU or single-cycle FPU op writing an integer register
-  logic [`XLEN-1:0] IFCvtResultW;                   // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int 
-  logic [`XLEN-1:0] MulDivResultW;                  // Multiply always comes from MDU.  Divide could come from MDU or FPU (when using fdivsqrt for integer division)
+  logic [P.XLEN-1:0] SCResultW;                      // Store Conditional result
+  logic [P.XLEN-1:0] ResultW;                        // Result to write to register file
+  logic [P.XLEN-1:0] IFResultW;                      // Result from either IEU or single-cycle FPU op writing an integer register
+  logic [P.XLEN-1:0] IFCvtResultW;                   // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int 
+  logic [P.XLEN-1:0] MulDivResultW;                  // Multiply always comes from MDU.  Divide could come from MDU or FPU (when using fdivsqrt for integer division)

  // Decode stage
  assign Rs1D      = InstrD[19:15];
  assign Rs2D      = InstrD[24:20];
  assign RdD       = InstrD[11:7];
-  regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
-  extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
+  regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
+  extend  #(P.XLEN, P.A_SUPPORTED) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
 
  // Execute stage pipeline register and logic
-  flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
-  flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
-  flopenrc #(`XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
+  flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
+  flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
+  flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
  flopenrc #(5)     Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
  flopenrc #(5)     Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
  flopenrc #(5)     RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
  
-  mux3  #(`XLEN)  faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
-  mux3  #(`XLEN)  fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
-  comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
-  mux2  #(`XLEN)  srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
-  mux2  #(`XLEN)  srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
-  alu   #(`XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
-  mux2 #(`XLEN)   altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
-  mux2 #(`XLEN)   ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
+  mux3  #(P.XLEN)  faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
+  mux3  #(P.XLEN)  fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
+  comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
+  mux2  #(P.XLEN)  srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
+  mux2  #(P.XLEN)  srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
+  alu   #(P.XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
+  mux2  #(P.XLEN)  altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
+  mux2  #(P.XLEN)  ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);

  // Memory stage pipeline register
-  flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
-  flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
-  flopenrc #(5)     RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);  
-  flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); 
+  flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
+  flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
+  flopenrc #(5)      RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);  
+  flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); 
  
  // Writeback stage pipeline register and logic
-  flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
-  flopenrc #(5)     RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
+  flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
+  flopenrc #(5)      RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);

  // floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt
-  if (`F_SUPPORTED) begin:fpmux
-    mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
-    mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
-    if (`IDIV_ON_FPU) begin
-      mux2  #(`XLEN)  divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
+  if (P.F_SUPPORTED) begin:fpmux
+    mux2  #(P.XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
+    mux2  #(P.XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
+    if (P.IDIV_ON_FPU) begin
+      mux2  #(P.XLEN)  divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
    end else begin 
      assign MulDivResultW = MDUResultW;
    end
@ -142,9 +140,9 @@ module datapath (
    assign IFCvtResultW = IFResultW;
    assign MulDivResultW = MDUResultW;
  end
-  mux5  #(`XLEN)  resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); 
+  mux5  #(P.XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); 
 
  // handle Store Conditional result if atomic extension supported
-  if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
+  if (P.A_SUPPORTED) assign SCResultW = {{(P.XLEN-1){1'b0}}, SquashSCW};
  else              assign SCResultW = 0;
-endmodule
+endmodule
--- a/src/ieu/extend.sv
+++ b/src/ieu/extend.sv
@ -27,29 +27,27 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module extend (
+module extend #(parameter XLEN, A_SUPPORTED) (
  input  logic [31:7]       InstrD,      // All instruction bits except opcode (lower 7 bits)
  input  logic [2:0]        ImmSrcD,     // Select what kind of extension to perform
-  output logic [`XLEN-1:0 ] ImmExtD);    // Extended immediate
+  output logic [XLEN-1:0 ] ImmExtD);    // Extended immediate

-  localparam [`XLEN-1:0] undefined = {(`XLEN){1'bx}}; // could change to 0 after debug
+  localparam [XLEN-1:0] undefined = {(XLEN){1'bx}}; // could change to 0 after debug
 
  always_comb
    case(ImmSrcD) 
      // I-type 
-      3'b000:   ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:20]};  
+      3'b000:   ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:20]};  
      // S-type (stores)
-      3'b001:   ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; 
+      3'b001:   ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; 
      // B-type (branches)
-      3'b010:   ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; 
+      3'b010:   ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; 
      // J-type (jal)
-      3'b011:   ImmExtD = {{(`XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; 
+      3'b011:   ImmExtD = {{(XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; 
      // U-type (lui, auipc)
-      3'b100:  ImmExtD = {{(`XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; 
+      3'b100:  ImmExtD = {{(XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; 
      // Store Conditional: zero offset
-      3'b101:  if (`A_SUPPORTED) ImmExtD = 0;
+      3'b101:  if (A_SUPPORTED) ImmExtD = 0;
               else              ImmExtD = undefined;
      default: ImmExtD = undefined; // undefined
    endcase  
--- a/src/ieu/forward.sv
+++ b/src/ieu/forward.sv
@ -27,8 +27,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module forward(
  // Detect hazards
  input  logic [4:0]  Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers
--- a/src/ieu/ieu.sv
+++ b/src/ieu/ieu.sv
@ -26,45 +26,44 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"

-module ieu (
+module ieu import cvw::*;  #(parameter cvw_t P) (
  input  logic              clk, reset,
  // Decode stage signals
  input  logic [31:0]       InstrD,                          // Instruction
  input  logic              IllegalIEUFPUInstrD,             // Illegal instruction
  output logic              IllegalBaseInstrD,               // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
  // Execute stage signals
-  input  logic [`XLEN-1:0]  PCE,                             // PC
-  input  logic [`XLEN-1:0]  PCLinkE,                         // PC + 4
+  input  logic [P.XLEN-1:0]  PCE,                             // PC
+  input  logic [P.XLEN-1:0]  PCLinkE,                         // PC + 4
  output logic              PCSrcE,                          // Select next PC (between PC+4 and IEUAdrE)
  input  logic              FWriteIntE, FCvtIntE,            // FPU writes to integer register file, FPU converts float to int
-  output logic [`XLEN-1:0]  IEUAdrE,                         // Memory address
+  output logic [P.XLEN-1:0]  IEUAdrE,                         // Memory address
  output logic              IntDivE, W64E,                   // Integer divide, RV64 W-type instruction 
  output logic [2:0]        Funct3E,                         // Funct3 instruction field
-  output logic [`XLEN-1:0]  ForwardedSrcAE, ForwardedSrcBE,  // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
+  output logic [P.XLEN-1:0]  ForwardedSrcAE, ForwardedSrcBE,  // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
  output logic [4:0]        RdE,                             // Destination register
  // Memory stage signals
  input  logic              SquashSCW,                       // Squash store conditional, from LSU
  output logic [1:0]        MemRWM,                          // Read/write control goes to LSU
  output logic [1:0]        AtomicM,                         // Atomic control goes to LSU
-  output logic [`XLEN-1:0]  WriteDataM,                      // Write data to LSU
+  output logic [P.XLEN-1:0]  WriteDataM,                      // Write data to LSU
  output logic [2:0]        Funct3M,                         // Funct3 (size and signedness) to LSU
-  output logic [`XLEN-1:0]  SrcAM,                           // ALU SrcA to Privileged unit and FPU
+  output logic [P.XLEN-1:0]  SrcAM,                           // ALU SrcA to Privileged unit and FPU
  output logic [4:0]        RdM,                             // Destination register
-  input  logic [`XLEN-1:0]  FIntResM,                        // Integer result from FPU (fmv, fclass, fcmp)
+  input  logic [P.XLEN-1:0]  FIntResM,                        // Integer result from FPU (fmv, fclass, fcmp)
  output logic              InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
  output logic              InstrValidD, InstrValidE, InstrValidM,// Instruction is valid
  output logic              BranchD, BranchE,
  output logic              JumpD, JumpE,
  // Writeback stage signals
-  input  logic [`XLEN-1:0]  FIntDivResultW,                  // Integer divide result from FPU fdivsqrt)
-  input  logic [`XLEN-1:0]  CSRReadValW,                     // CSR read value, 
-  input  logic [`XLEN-1:0]  MDUResultW,                      // multiply/divide unit result
-  input  logic [`XLEN-1:0]  FCvtIntResW,                     // FPU's float to int conversion result
+  input  logic [P.XLEN-1:0]  FIntDivResultW,                  // Integer divide result from FPU fdivsqrt)
+  input  logic [P.XLEN-1:0]  CSRReadValW,                     // CSR read value, 
+  input  logic [P.XLEN-1:0]  MDUResultW,                      // multiply/divide unit result
+  input  logic [P.XLEN-1:0]  FCvtIntResW,                     // FPU's float to int conversion result
  input  logic              FCvtIntW,                        // FPU converts float to int
  output logic [4:0]        RdW,                             // Destination register
-  input  logic [`XLEN-1:0]  ReadDataW,                       // LSU's read data
+  input  logic [P.XLEN-1:0]  ReadDataW,                       // LSU's read data
  // Hazard unit signals
  input  logic              StallD, StallE, StallM, StallW,  // Stall signals from hazard unit
  input  logic              FlushD, FlushE, FlushM, FlushW,  // Flush signals
@ -96,7 +95,7 @@ module ieu (
  logic       BranchSignedE;                                 // Branch does signed comparison on operands
  logic       MDUE;                                          // Multiply/divide instruction
           
-controller c(
+  controller #(P) c(
    .clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
    .IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
    .PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE, 
@ -105,7 +104,7 @@ controller c(
    .RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
    .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);

-  datapath   dp(
+  datapath #(P) dp(
    .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
    .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE, 
    .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE,
--- a/src/ieu/regfile.sv
+++ b/src/ieu/regfile.sv
@ -27,18 +27,16 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module regfile (
+module regfile #(parameter XLEN, E_SUPPORTED) (
  input  logic             clk, reset,
  input  logic             we3,                 // Write enable
  input  logic [4:0]       a1, a2, a3,          // Source registers to read (a1, a2), destination register to write (a3)
-  input  logic [`XLEN-1:0] wd3,                 // Write data for port 3
-  output logic [`XLEN-1:0] rd1, rd2);           // Read data for ports 1, 2
+  input  logic [XLEN-1:0] wd3,                 // Write data for port 3
+  output logic [XLEN-1:0] rd1, rd2);           // Read data for ports 1, 2

-  localparam NUMREGS = `E_SUPPORTED ? 16 : 32;  // only 16 registers in E mode
+  localparam NUMREGS = E_SUPPORTED ? 16 : 32;  // only 16 registers in E mode

-  logic [`XLEN-1:0] rf[NUMREGS-1:1];
+  logic [XLEN-1:0] rf[NUMREGS-1:1];
  integer i;

  // Three ported register file
--- a/src/ifu/bpred/RASPredictor.sv
+++ b/src/ifu/bpred/RASPredictor.sv
@ -27,9 +27,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module RASPredictor #(parameter int StackSize = 16 )(
+module RASPredictor import cvw::*;  #(parameter cvw_t P, StackSize = 16 )(
  input  logic             clk,
  input  logic             reset, 
  input  logic             StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
@ -37,15 +35,15 @@ module RASPredictor #(parameter int StackSize = 16 )(
  input  logic             ReturnD,
  input  logic             ReturnE, CallE,                  // Instr class
  input  logic             BPReturnF,
-  input  logic [`XLEN-1:0] PCLinkE,                                   // PC of instruction after a call
-  output logic [`XLEN-1:0] RASPCF                                     // Top of the stack
+  input  logic [P.XLEN-1:0] PCLinkE,                                   // PC of instruction after a call
+  output logic [P.XLEN-1:0] RASPCF                                     // Top of the stack
   );

  logic                     CounterEn;
  localparam Depth = $clog2(StackSize);

  logic [Depth-1:0]         NextPtr, Ptr, P1, M1, IncDecPtr;
-  logic [StackSize-1:0]     [`XLEN-1:0] memory;
+  logic [StackSize-1:0]     [P.XLEN-1:0] memory;
  integer        index;

  logic      PopF;
@ -85,7 +83,7 @@ module RASPredictor #(parameter int StackSize = 16 )(
  always_ff @ (posedge clk) begin
    if(reset) begin
      for(index=0; index<StackSize; index++)
-    memory[index] <= {`XLEN{1'b0}};
+    memory[index] <= {P.XLEN{1'b0}};
    end else if(PushE) begin
      memory[NextPtr] <= #1 PCLinkE;
    end
--- a/src/ifu/bpred/bpred.sv
+++ b/src/ifu/bpred/bpred.sv
@ -26,27 +26,25 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 `define INSTR_CLASS_PRED 1

-module bpred (
+module bpred import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk, reset,
  input  logic             StallF, StallD, StallE, StallM, StallW,
  input  logic             FlushD, FlushE, FlushM, FlushW,
  // Fetch stage
  // the prediction
  input  logic [31:0]      InstrD,                    // Decompressed decode stage instruction. Used to decode instruction class
-  input  logic [`XLEN-1:0] PCNextF,                   // Next Fetch Address
-  input  logic [`XLEN-1:0] PCPlus2or4F,               // PCF+2/4
-  output logic [`XLEN-1:0] PC1NextF,                  // Branch Predictor predicted or corrected fetch address on miss prediction
-  output logic [`XLEN-1:0] NextValidPCE,              // Address of next valid instruction after the instruction in the Memory stage
+  input  logic [P.XLEN-1:0] PCNextF,                   // Next Fetch Address
+  input  logic [P.XLEN-1:0] PCPlus2or4F,               // PCF+2/4
+  output logic [P.XLEN-1:0] PC1NextF,                  // Branch Predictor predicted or corrected fetch address on miss prediction
+  output logic [P.XLEN-1:0] NextValidPCE,              // Address of next valid instruction after the instruction in the Memory stage

  // Update Predictor
-  input  logic [`XLEN-1:0] PCF,                       // Fetch stage instruction address
-  input  logic [`XLEN-1:0] PCD,                       // Decode stage instruction address. Also the address the branch predictor took
-  input  logic [`XLEN-1:0] PCE,                       // Execution stage instruction address
-  input  logic [`XLEN-1:0] PCM,                       // Memory stage instruction address
+  input  logic [P.XLEN-1:0] PCF,                       // Fetch stage instruction address
+  input  logic [P.XLEN-1:0] PCD,                       // Decode stage instruction address. Also the address the branch predictor took
+  input  logic [P.XLEN-1:0] PCE,                       // Execution stage instruction address
+  input  logic [P.XLEN-1:0] PCM,                       // Memory stage instruction address

  input  logic [31:0]      PostSpillInstrRawF,        // Instruction

@ -55,9 +53,9 @@ module bpred (
  input  logic             BranchD, BranchE,
  input  logic             JumpD, JumpE,
  input  logic             PCSrcE,                    // Executation stage branch is taken
-  input  logic [`XLEN-1:0] IEUAdrE,                   // The branch/jump target address
-  input  logic [`XLEN-1:0] IEUAdrM,                   // The branch/jump target address
-  input  logic [`XLEN-1:0] PCLinkE,                   // The address following the branch instruction. (AKA Fall through address)
+  input  logic [P.XLEN-1:0] IEUAdrE,                   // The branch/jump target address
+  input  logic [P.XLEN-1:0] IEUAdrM,                   // The branch/jump target address
+  input  logic [P.XLEN-1:0] PCLinkE,                   // The address following the branch instruction. (AKA Fall through address)
  output logic [3:0]       InstrClassM,               // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br

  // Report branch prediction status
@ -71,21 +69,21 @@ module bpred (

  logic [1:0]              BPDirPredF;

-  logic [`XLEN-1:0]        BPBTAF, RASPCF;
+  logic [P.XLEN-1:0]        BPBTAF, RASPCF;
  logic                    BPPCWrongE;
  logic                    IClassWrongE;
  logic                    BPDirPredWrongE;
  
  logic                    BPPCSrcF;
-  logic [`XLEN-1:0]        BPPCF;
-  logic [`XLEN-1:0]        PC0NextF;
-  logic [`XLEN-1:0]        PCCorrectE;
+  logic [P.XLEN-1:0]        BPPCF;
+  logic [P.XLEN-1:0]        PC0NextF;
+  logic [P.XLEN-1:0]        PCCorrectE;
  logic [3:0]              WrongPredInstrClassD;

  logic                    BTBTargetWrongE;
  logic                    RASTargetWrongE;

-  logic [`XLEN-1:0]        BPBTAD;
+  logic [P.XLEN-1:0]        BPBTAD;

  logic                    BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
  logic                    BPBranchF, BPJumpF, BPReturnF, BPCallF;
@ -95,57 +93,58 @@ module bpred (
  logic                    BranchM, JumpM, ReturnM, CallM;
  logic                    BranchW, JumpW, ReturnW, CallW;
  logic                    BPReturnWrongD;
-  logic [`XLEN-1:0]        BPBTAE;
+  logic [P.XLEN-1:0]        BPBTAE;
  
  // Part 1 branch direction prediction
-  // look into the 2 port Sram model. something is wrong. 
-  if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor
-    twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, 
+  if (P.BPRED_TYPE == BP_TWOBIT) begin:Predictor
+    twoBitPredictor #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, 
      .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
      .BranchE, .BranchM, .PCSrcE);

-  end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor
-    gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+  end else if (P.BPRED_TYPE == BP_GSHARE) begin:Predictor
+    gshare #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
      .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, 
      .PCSrcE);

-  end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor
-    gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+  end else if (P.BPRED_TYPE == BP_GLOBAL) begin:Predictor
+    gshare #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
      .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW,
      .PCSrcE);

-  end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor
-    gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+  end else if (P.BPRED_TYPE == BP_GSHARE_BASIC) begin:Predictor
+    gsharebasic #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
      .BranchE, .BranchM, .PCSrcE);

-  end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor
-    gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+  end else if (P.BPRED_TYPE == BP_GLOBAL_BASIC) begin:Predictor
+    gsharebasic #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
      .BranchE, .BranchM, .PCSrcE);
  
-  end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
-    // *** Fix me
-/* -----\/----- EXCLUDED -----\/-----
-    localHistoryPredictor DirPredictor(.clk,
-      .reset, .StallF, .StallE,
-      .LookUpPC(PCNextF),
-      .Prediction(BPDirPredF),
-      // update
-      .UpdatePC(PCE),
-      .UpdateEN(InstrClassE[0] & ~StallE),
-      .PCSrcE,
-      .UpdatePrediction(InstrClassE[0]));
- -----/\----- EXCLUDED -----/\----- */
+  end else if (P.BPRED_TYPE == BP_LOCAL_BASIC) begin:Predictor
+    localbpbasic #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset, 
+      .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+      .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
+      .BranchE, .BranchM, .PCSrcE);
+  end else if (P.BPRED_TYPE == BP_LOCAL_AHEAD) begin:Predictor
+    localaheadbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset, 
+      .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+      .PCNextF, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
+      .BranchE, .BranchM, .PCSrcE);
+  end else if (P.BPRED_TYPE == BP_LOCAL_REPAIR) begin:Predictor
+    localrepairbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset, 
+      .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+      .PCNextF, .PCE, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
+      .BranchD, .BranchE, .BranchM, .PCSrcE);
  end 

  // Part 2 Branch target address prediction
  // BTB contains target address for all CFI

-  btb #(`BTB_SIZE) 
+  btb #(P, P.BTB_SIZE) 
    TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
      .PCNextF, .PCF, .PCD, .PCE, .PCM,
      .BPBTAF, .BPBTAD, .BPBTAE,
@ -157,13 +156,13 @@ module bpred (
      .InstrClassM({CallM, ReturnM, JumpM, BranchM}),
      .InstrClassW({CallW, ReturnW, JumpW, BranchW}));

-  icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
+  icpred #(P, `INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
    .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
    .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
    .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD);

  // Part 3 RAS
-  RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
+  RASPredictor #(P) RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
    .BPReturnF, .ReturnD, .ReturnE, .CallE,
    .BPReturnWrongD, .RASPCF, .PCLinkE);

@ -179,21 +178,21 @@ module bpred (
  
  // Output the predicted PC or corrected PC on miss-predict.
  assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
-  mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
+  mux2 #(P.XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
  // Selects the BP or PC+2/4.
-  mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
+  mux2 #(P.XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
  // If the prediction is wrong select the correct address.
-  mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);  
+  mux2 #(P.XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);  
  // Correct branch/jump target.
-  mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
+  mux2 #(P.XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
  
  // If the fence/csrw was predicted as a taken branch then we select PCF, rather than PCE.
  // Effectively this is PCM+4 or the non-existant PCLinkM
-  if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
+  if(`INSTR_CLASS_PRED) mux2 #(P.XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
  else  assign NextValidPCE = PCE;

-  if(`ZICOUNTERS_SUPPORTED) begin
-    logic [`XLEN-1:0]       RASPCD, RASPCE;
+  if(P.ZICOUNTERS_SUPPORTED) begin
+    logic [P.XLEN-1:0]       RASPCD, RASPCE;
    logic                   BTAWrongE, RASPredPCWrongE;  
    // performance counters
    // 1. class         (class wrong / minstret) (IClassWrongM / csr)                    // Correct now
@ -209,8 +208,8 @@ module bpred (
    assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
    assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;

-    flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
-    flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
+    flopenrc #(P.XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
+    flopenrc #(P.XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
    flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, 
      {BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
      {BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
--- a/src/ifu/bpred/btb.sv
+++ b/src/ifu/bpred/btb.sv
@ -28,22 +28,20 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module btb #(parameter Depth = 10 ) (
+module btb import cvw::*;  #(parameter cvw_t P, Depth = 10 ) (
  input  logic             clk,
  input  logic             reset,
  input  logic             StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
-  input  logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
-  output logic [`XLEN-1:0] BPBTAF,                      // BTB's guess at PC
-  output logic [`XLEN-1:0] BPBTAD,
-  output logic [`XLEN-1:0] BPBTAE,
+  input  logic [P.XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
+  output logic [P.XLEN-1:0] BPBTAF,                      // BTB's guess at PC
+  output logic [P.XLEN-1:0] BPBTAD,
+  output logic [P.XLEN-1:0] BPBTAE,
  output logic [3:0]       BTBIClassF,                  // BTB's guess at instruction class
  // update
  input  logic             IClassWrongM,                // BTB's instruction class guess was wrong
  input  logic             IClassWrongE,
-  input  logic [`XLEN-1:0] IEUAdrE,                     // Branch/jump target address to insert into btb
-  input  logic [`XLEN-1:0] IEUAdrM,                     // Branch/jump target address to insert into btb
+  input  logic [P.XLEN-1:0] IEUAdrE,                     // Branch/jump target address to insert into btb
+  input  logic [P.XLEN-1:0] IEUAdrM,                     // Branch/jump target address to insert into btb
  input  logic [3:0]       InstrClassD,                 // Instruction class to insert into btb
  input  logic [3:0]       InstrClassE,                 // Instruction class to insert into btb
  input  logic [3:0]       InstrClassM,                 // Instruction class to insert into btb
@ -51,12 +49,12 @@ module btb #(parameter Depth = 10 ) (
 );

  logic [Depth-1:0]        PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
-  logic [`XLEN-1:0]        ResetPC;
+  logic [P.XLEN-1:0]        ResetPC;
  logic                    MatchD, MatchE, MatchM, MatchW, MatchX;
-  logic [`XLEN+3:0]        ForwardBTBPrediction, ForwardBTBPredictionF;
-  logic [`XLEN+3:0]        TableBTBPredF;
-  logic [`XLEN-1:0]        IEUAdrW;
-  logic [`XLEN-1:0]        PCW;
+  logic [P.XLEN+3:0]        ForwardBTBPrediction, ForwardBTBPredictionF;
+  logic [P.XLEN+3:0]        TableBTBPredF;
+  logic [P.XLEN-1:0]        IEUAdrW;
+  logic [P.XLEN-1:0]        PCW;
  logic                    BTBWrongE, BPBTAWrongE;
  logic                    BTBWrongM, BPBTAWrongM;
  
@ -75,7 +73,7 @@ module btb #(parameter Depth = 10 ) (
  // during reset.  The BTB must produce a non X PC1NextF to allow the simulation to run.
  // While the mux could be included in IFU it is not necessary for the IROM/I$/bus.
  // For now it is optimal to leave it here.
-  assign ResetPC = `RESET_VECTOR;
+  assign ResetPC = P.RESET_VECTOR[P.XLEN-1:0];
  assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; 

  assign MatchD = PCFIndex == PCDIndex;
@ -93,22 +91,22 @@ module btb #(parameter Depth = 10 ) (


  // An optimization may be using a PC relative address.
-  ram2p1r1wbe #(2**Depth, `XLEN+4) memory(
+  ram2p1r1wbe #(2**Depth, P.XLEN+4) memory(
    .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
     .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));

-  flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
+  flopenrc #(P.XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);

  // BPBTAE is not strickly necessary.  However it is used by two parts of wally.
  // 1. It gates updates to the BTB when the prediction does not change.  This save power.
  // 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong.
-  flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
+  flopenrc #(P.XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
  assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);

  flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM);  
  assign BTBWrongM = BPBTAWrongM | IClassWrongM;
  
-  flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
-  flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
+  flopenr #(P.XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
+  flopenr #(P.XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);

 endmodule
--- a/src/ifu/bpred/gshare.sv
+++ b/src/ifu/bpred/gshare.sv
@ -27,9 +27,9 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"

-module gshare #(parameter k = 10,
+module gshare #(parameter XLEN, 
+                parameter k = 10,
                parameter integer TYPE = 1) (
  input logic             clk,
  input logic             reset,
@ -38,7 +38,7 @@ module gshare #(parameter k = 10,
  output logic [1:0]      BPDirPredF, 
  output logic            BPDirPredWrongE,
  // update
-  input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
+  input logic [XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
  input logic             BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE
 );

--- a/src/ifu/bpred/gsharebasic.sv
+++ b/src/ifu/bpred/gsharebasic.sv
@ -27,9 +27,8 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module gsharebasic #(parameter k = 10,
+module gsharebasic #(parameter XLEN,
+                     parameter k = 10,
                     parameter TYPE = 1) (
  input logic             clk,
  input logic             reset,
@ -38,7 +37,7 @@ module gsharebasic #(parameter k = 10,
  output logic [1:0]      BPDirPredF, 
  output logic            BPDirPredWrongE,
  // update
-  input logic [`XLEN-1:0] PCNextF, PCM,
+  input logic [XLEN-1:0] PCNextF, PCM,
  input logic             BranchE, BranchM, PCSrcE
 );

--- a/src/ifu/bpred/icpred.sv
+++ b/src/ifu/bpred/icpred.sv
@ -26,10 +26,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-
-module icpred #(parameter INSTR_CLASS_PRED = 1)(
+module icpred import cvw::*;  #(parameter cvw_t P, INSTR_CLASS_PRED = 1)(
  input  logic             clk, reset,
  input  logic             StallF, StallD, StallE, StallM, StallW,
  input  logic             FlushD, FlushE, FlushM, FlushW,
@ -56,10 +53,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
    logic     ccall, cj, cjr, ccallr, CJumpF, CBranchF;
    logic     NCJumpF, NCBranchF;

-    if(`C_SUPPORTED) begin
+    if(P.C_SUPPORTED) begin
      logic [4:0] CompressedOpcF;
      assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
-      assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32;
+      assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32;
      assign cj = CompressedOpcF == 5'h0d;
      assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
      assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
@ -72,13 +69,13 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
    assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
    assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63;
    
-    assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF);
-    assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF));
+    assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF);
+    assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF));
    assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
-        (`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
+        (P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
    
    assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
-        (`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
+        (P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));

  end else begin
    // This section connects the BTB's instruction class prediction.
--- a/src/ifu/bpred/localHistoryPredictor.sv
+++ b/src/ifu/bpred/localHistoryPredictor.sv
@ -1,130 +0,0 @@
-///////////////////////////////////////////
-// locallHistoryPredictor.sv
-//
-// Written: Shreya Sanghai
-// Email: ssanghai@hmc.edu
-// Created: March 16, 2021
-// Modified: 
-//
-// Purpose: Global History Branch predictor with parameterized global history register
-// 
-// A component of the CORE-V-WALLY configurable RISC-V project.
-// 
-// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
-//
-// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
-//
-// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
-// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
-// may obtain a copy of the License at
-//
-// https://solderpad.org/licenses/SHL-2.1/
-//
-// Unless required by applicable law or agreed to in writing, any work distributed under the 
-// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
-// either express or implied. See the License for the specific language governing permissions 
-// and limitations under the License.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module localHistoryPredictor #(parameter m = 6,    // 2^m = number of local history branches
-                                         k = 10) ( // number of past branches stored
-  input  logic             clk,
-  input  logic             reset,
-  input  logic             StallF,  StallE,
-  input  logic [`XLEN-1:0] LookUpPC,
-  output logic [1:0]       Prediction,
-  // update
-  input logic [`XLEN-1:0]  UpdatePC,
-  input logic              UpdateEN, PCSrcE, 
-  input logic [1:0]        UpdatePrediction
-);
-
-  logic [2**m-1:0][k-1:0]  LHRNextF;
-  logic [k-1:0]            LHRF, ForwardLHRNext, LHRFNext;
-  logic [m-1:0]            LookUpPCIndex, UpdatePCIndex;
-  logic [1:0]              PredictionMemory;
-  logic                    DoForwarding, DoForwardingF, DoForwardingPHT, DoForwardingPHTF;
-  logic [1:0]              UpdatePredictionF;
-
-  assign LHRFNext = {PCSrcE, LHRF[k-1:1]}; 
-  assign UpdatePCIndex = {UpdatePC[m+1] ^ UpdatePC[1], UpdatePC[m:2]};
-  assign LookUpPCIndex = {LookUpPC[m+1] ^ LookUpPC[1], LookUpPC[m:2]};  
-
-  // INCASE we do ahead pipelining
-  //    ram2p1r1wb #(m,k) LHR(.clk(clk)),
-  //                 .reset(reset),
-  //                 .RA1(LookUpPCIndex), // need hashing function to get correct PC address 
-  //                 .RD1(LHRF),
-  //                 .REN1(~StallF),
-  //                 .WA1(UpdatePCIndex),
-  //                 .WD1(LHRENExt),
-  //                 .WEN1(UpdateEN),
-  //                 .BitWEN1(2'b11));  
-
-  genvar      index;
-  for (index = 0; index < 2**m; index = index +1) begin:localhist
-    flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateEN & (index == UpdatePCIndex)),
-                                      .d(LHRFNext), .q(LHRNextF[index]));
-  end 
-
-  // need to forward when updating to the same address as reading.
-  // first we compare to see if the update and lookup addreses are the same
-  assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
-  assign ForwardLHRNext = DoForwarding ? LHRFNext :LHRNextF[LookUpPCIndex]; 
-
-  // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT 
-  // LHR referes to the address that the past k branches points to in the prediction stage 
-  // LHRE refers to the address that the past k branches points to in the exectution stage
-  ram2p1r1wb #(k, 2) PHT(.clk(clk), 
-    .reset(reset),
-    .ra1(ForwardLHRNext),
-    .rd1(PredictionMemory),
-    .ren1(~StallF),
-    .wa2(LHRFNext),
-    .wd2(UpdatePrediction),
-    .wen2(UpdateEN),
-    .bwe2(2'b11));
-
-
-  
-  assign DoForwardingPHT = LHRFNext == ForwardLHRNext; 
-
-  // register the update value and the forwarding signal into the Fetch stage
-  // TODO: add stall logic ***
-  flopr #(1) DoForwardingReg(.clk(clk),
-        .reset(reset),
-        .d(DoForwardingPHT),
-        .q(DoForwardingPHTF));
-  
-  flopr #(2) UpdatePredictionReg(.clk(clk),
-     .reset(reset),
-     .d(UpdatePrediction),
-     .q(UpdatePredictionF));
-
-  assign Prediction = DoForwardingPHTF ? UpdatePredictionF : PredictionMemory;
-  
-  //pipeline for LHR
-  flopenrc #(k) LHRFReg(.clk(clk),
-   .reset(reset),
-   .en(~StallF),
-   .clear(1'b0),
-   .d(ForwardLHRNext),
-   .q(LHRF));
-  /*
-   flopenrc #(k) LHRDReg(.clk(clk),
-   .reset(reset),
-   .en(~StallD),
-   .clear(FlushD),
-   .d(LHRF),
-   .q(LHRD));
-   
-   flopenrc #(k) LHREReg(.clk(clk),
-   .reset(reset),
-   .en(~StallE),
-   .clear(FlushE),
-   .d(LHRD),
-   .q(LHRE));
-   */
-endmodule
--- a/src/ifu/bpred/localaheadbp.sv
+++ b/src/ifu/bpred/localaheadbp.sv
@ -0,0 +1,114 @@
+///////////////////////////////////////////
+// localaheadbp
+//
+// Written: Ross Thompson
+// Email: ross1728@gmail.com
+// Created: 16 March 2021
+//
+// Purpose: local history branch predictor with ahead pipelining and SRAM memories.
+// 
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module localaheadbp #(parameter XLEN,
+                      parameter m = 6, // 2^m = number of local history branches 
+                      parameter k = 10) ( // number of past branches stored
+  input logic             clk,
+  input logic             reset,
+  input logic             StallF, StallD, StallE, StallM, StallW,
+  input logic             FlushD, FlushE, FlushM, FlushW,
+  output logic [1:0]      BPDirPredD, 
+  output logic            BPDirPredWrongE,
+  // update
+  input logic [XLEN-1:0] PCNextF, PCM,
+  input logic             BranchE, BranchM, PCSrcE
+);
+
+  logic [k-1:0]           IndexNextF, IndexM;
+  //logic [1:0]             BPDirPredD, BPDirPredE;
+  logic [1:0]             BPDirPredE;
+  logic [1:0]             BPDirPredM;
+  logic [1:0]             NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
+
+  logic [k-1:0]           LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
+  logic [k-1:0]           LHRNextW;
+  logic                   PCSrcM;
+  logic [2**m-1:0][k-1:0] LHRArray;
+  logic [m-1:0]           IndexLHRNextF, IndexLHRM;
+  logic [XLEN-1:0]       PCW;
+  
+  
+  logic                    UpdateM;
+
+  //assign IndexNextF = LHR;
+  assign IndexM = LHRW;
+  
+  ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
+    .ce1(~StallD), .ce2(~StallW & ~FlushW),
+    .ra1(LHRF),
+    .rd1(BPDirPredD),
+    .wa2(IndexM),
+    .wd2(NewBPDirPredW),
+    .we2(BranchM),
+    .bwe2(1'b1));
+
+  //flopenrc #(2) PredictionRegD(clk, reset,  FlushD, ~StallD, BPDirPredF, BPDirPredD);
+  flopenrc #(2) PredictionRegE(clk, reset,  FlushE, ~StallE, BPDirPredD, BPDirPredE);
+  flopenrc #(2) PredictionRegM(clk, reset,  FlushM, ~StallM, BPDirPredE, BPDirPredM);
+
+  satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
+  //flopenrc #(2) NewPredictionRegM(clk, reset,  FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
+  flopenrc #(2) NewPredictionRegW(clk, reset,  FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
+
+  assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
+
+  // This is the main difference between global and local history basic implementations. In global, 
+  // the ghr wraps back into itself directly without
+  // being pipelined.  I.E. GHR is not read in F and then pipelined to M where it is updated.  Instead
+  // GHR is both read and update in M.  GHR is still pipelined so that the PHT is updated with the correct
+  // GHR.  Local history in contrast must pipeline the specific history register read during F and then update
+  // that same one in M.  This implementation does not forward if a branch matches in the D, E, or M stages.
+  assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
+
+  // this is local history
+  //genvar      index;
+  //assign UpdateM = BranchM & ~StallW & ~FlushW;
+  assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
+  assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
+
+  ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
+    .ce1(~StallF), .ce2(~StallW & ~FlushW),
+    .ra1(IndexLHRNextF),
+    .rd1(LHRF),
+    .wa2(IndexLHRM),
+    .wd2(LHRNextW),
+    .we2(BranchM),
+    .bwe2('1));  
+
+  flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
+    
+  //flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
+  //assign LHRF = LHRNextF;
+  flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
+  flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
+  flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
+  flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
+
+  flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
+
+endmodule
--- a/src/ifu/bpred/localbpbasic.sv
+++ b/src/ifu/bpred/localbpbasic.sv
@ -0,0 +1,105 @@
+///////////////////////////////////////////
+// localbpbasic
+//
+// Written: Ross Thompson
+// Email: ross1728@gmail.com
+// Created: 16 March 2021
+//
+// Purpose: Local history branch predictor. Basic implementation without any repair and flop memories.
+
+// 
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module localbpbasic #(parameter XLEN,
+                      parameter m = 6, // 2^m = number of local history branches 
+                      parameter k = 10) ( // number of past branches stored
+  input logic             clk,
+  input logic             reset,
+  input logic             StallF, StallD, StallE, StallM, StallW,
+  input logic             FlushD, FlushE, FlushM, FlushW,
+  output logic [1:0]      BPDirPredF, 
+  output logic            BPDirPredWrongE,
+  // update
+  input logic [XLEN-1:0] PCNextF, PCM,
+  input logic             BranchE, BranchM, PCSrcE
+);
+
+  logic [k-1:0]           IndexNextF, IndexM;
+  logic [1:0]             BPDirPredD, BPDirPredE;
+  logic [1:0]             NewBPDirPredE, NewBPDirPredM;
+
+  logic [k-1:0]           LHRF, LHRD, LHRE, LHRM, LHR;
+  logic [k-1:0]           LHRNextW;
+  logic                   PCSrcM;
+  logic [2**m-1:0][k-1:0]  LHRArray;
+  logic [m-1:0]            IndexLHRNextF, IndexLHRM;
+  
+  logic                    UpdateM;
+
+  assign IndexNextF = LHR;
+  assign IndexM = LHRM;
+  
+  ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
+    .ce1(~StallF), .ce2(~StallW & ~FlushW),
+    .ra1(IndexNextF),
+    .rd1(BPDirPredF),
+    .wa2(IndexM),
+    .wd2(NewBPDirPredM),
+    .we2(BranchM),
+    .bwe2(1'b1));
+
+  flopenrc #(2) PredictionRegD(clk, reset,  FlushD, ~StallD, BPDirPredF, BPDirPredD);
+  flopenrc #(2) PredictionRegE(clk, reset,  FlushE, ~StallE, BPDirPredD, BPDirPredE);
+
+  satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE));
+  flopenrc #(2) NewPredictionRegM(clk, reset,  FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
+
+  assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
+
+  // This is the main difference between global and local history basic implementations. In global, 
+  // the ghr wraps back into itself directly without
+  // being pipelined.  I.E. GHR is not read in F and then pipelined to M where it is updated.  Instead
+  // GHR is both read and update in M.  GHR is still pipelined so that the PHT is updated with the correct
+  // GHR.  Local history in contrast must pipeline the specific history register read during F and then update
+  // that same one in M.  This implementation does not forward if a branch matches in the D, E, or M stages.
+  assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
+
+  // this is local history
+  genvar      index;
+  assign UpdateM = BranchM & ~StallW & ~FlushW;
+  assign IndexLHRM = {PCM[m+1] ^ PCM[1], PCM[m:2]};
+  for (index = 0; index < 2**m; index = index +1) begin:localhist
+    flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateM & (index == IndexLHRM)),
+                                      .d(LHRNextW), .q(LHRArray[index]));
+  end
+  assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
+  assign LHR = LHRArray[IndexLHRNextF];
+
+  // this is global history
+  //flopenr #(k) LHRReg(clk, reset, ~StallM & ~FlushM & BranchM, LHRNextW, LHR);
+
+  flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
+    
+  flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHR, LHRF);
+  flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
+  flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
+  flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
+
+
+endmodule
--- a/src/ifu/bpred/localrepairbp.sv
+++ b/src/ifu/bpred/localrepairbp.sv
@ -0,0 +1,135 @@
+///////////////////////////////////////////
+// localrepairbp
+//
+// Written: Ross Thompson
+// Email: ross1728@gmail.com
+// Created: 15 April 2023
+//
+// Purpose: Local history branch predictor with speculation and repair using CBH.
+// 
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module localrepairbp #(parameter XLEN,
+                       parameter m = 6, // 2^m = number of local history branches 
+                       parameter k = 10) ( // number of past branches stored
+  input logic             clk,
+  input logic             reset,
+  input logic             StallF, StallD, StallE, StallM, StallW,
+  input logic             FlushD, FlushE, FlushM, FlushW,
+  output logic [1:0]      BPDirPredD, 
+  output logic            BPDirPredWrongE,
+  // update
+  input logic [XLEN-1:0] PCNextF, PCE, PCM,
+  input logic             BranchD, BranchE, BranchM, PCSrcE
+);
+
+  //logic [1:0]             BPDirPredD, BPDirPredE;
+  logic [1:0]             BPDirPredE;
+  logic [1:0]             BPDirPredM;
+  logic [1:0]             NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
+
+  logic [k-1:0]           LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
+  logic [k-1:0]           LHRNextW;
+  logic                   PCSrcM;
+  logic [2**m-1:0][k-1:0] LHRArray;
+  logic [m-1:0]           IndexLHRNextF, IndexLHRM;
+  logic [XLEN-1:0]       PCW;
+
+  logic [k-1:0]           LHRCommittedF, LHRSpeculativeF;
+  logic [m-1:0]           IndexLHRD;
+  logic [k-1:0]           LHRNextE;
+  logic                   SpeculativeFlushedF;
+  
+  
+  ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
+    .ce1(~StallD), .ce2(~StallW & ~FlushW),
+    .ra1(LHRF),
+    .rd1(BPDirPredD),
+    .wa2(LHRW),
+    .wd2(NewBPDirPredW),
+    .we2(BranchM),
+    .bwe2(1'b1));
+
+  //flopenrc #(2) PredictionRegD(clk, reset,  FlushD, ~StallD, BPDirPredF, BPDirPredD);
+  flopenrc #(2) PredictionRegE(clk, reset,  FlushE, ~StallE, BPDirPredD, BPDirPredE);
+  flopenrc #(2) PredictionRegM(clk, reset,  FlushM, ~StallM, BPDirPredE, BPDirPredM);
+
+  satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
+  //flopenrc #(2) NewPredictionRegM(clk, reset,  FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
+  flopenrc #(2) NewPredictionRegW(clk, reset,  FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
+
+  assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
+
+  // This is the main difference between global and local history basic implementations. In global, 
+  // the ghr wraps back into itself directly without
+  // being pipelined.  I.E. GHR is not read in F and then pipelined to M where it is updated.  Instead
+  // GHR is both read and update in M.  GHR is still pipelined so that the PHT is updated with the correct
+  // GHR.  Local history in contrast must pipeline the specific history register read during F and then update
+  // that same one in M.  This implementation does not forward if a branch matches in the D, E, or M stages.
+  assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
+
+  // this is local history
+  assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
+  assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
+
+  ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
+    .ce1(~StallF), .ce2(~StallW & ~FlushW),
+    .ra1(IndexLHRNextF),
+    .rd1(LHRCommittedF),
+    .wa2(IndexLHRM),
+    .wd2(LHRNextW),
+    .we2(BranchM),
+    .bwe2('1));
+
+  assign IndexLHRD = {PCE[m+1] ^ PCE[1], PCE[m:2]};
+  assign LHRNextE = BranchD ? {BPDirPredD[1], LHRE[k-1:1]} : LHRE;
+  // *** replace with a small CAM
+  ram2p1r1wbe #(2**m, k) SHB(.clk(clk),
+    .ce1(~StallF), .ce2(~StallE & ~FlushE),
+    .ra1(IndexLHRNextF),
+    .rd1(LHRSpeculativeF),
+    .wa2(IndexLHRD),
+    .wd2(LHRNextE),
+    .we2(BranchD),
+    .bwe2('1));
+  // **** replace with small CAM
+  logic [2**m-1:0]        FlushedBits;
+  always_ff @(posedge clk) begin // Valid bit array,
+    SpeculativeFlushedF <= #1 FlushedBits[IndexLHRNextF];
+    if (reset | FlushD) FlushedBits        <= #1 '1;
+    if(BranchD & ~StallE & ~FlushE) begin
+      FlushedBits[IndexLHRD] <= #1 '0;
+    end
+  end
+
+  //assign SpeculativeFlushedF = '1;
+  mux2 #(k) LHRMux(LHRSpeculativeF, LHRCommittedF, SpeculativeFlushedF, LHRF);
+
+  flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
+    
+  //flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
+  //assign LHRF = LHRNextF;
+  flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
+  flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
+  flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
+  flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
+
+  flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
+
+endmodule
--- a/Show More
+++ b/Show More