Merge branch 'main' of https://github.com/openhwgroup/cvw into dev

This commit is contained in:
David Harris 2023-05-29 23:24:54 -07:00
commit 9b8a2303a9
159 changed files with 4869 additions and 3066 deletions

View File

@ -84,6 +84,11 @@ void setStats(int enable)
READ_CTR(mhpmcounter10);
READ_CTR(mhpmcounter11);
READ_CTR(mhpmcounter12);
READ_CTR(mhpmcounter13);
READ_CTR(mhpmcounter14);
READ_CTR(mhpmcounter15);
READ_CTR(mhpmcounter16);
READ_CTR(mhpmcounter17);
#undef READ_CTR
}
@ -167,18 +172,21 @@ void _init(int cid, int nc)
counters[12] = read_csr(mhpmcounter12) - counters[12];
counters[13] = read_csr(mhpmcounter13) - counters[13];
counters[14] = read_csr(mhpmcounter14) - counters[14];
counters[15] = read_csr(mhpmcounter15) - counters[15];
counters[16] = read_csr(mhpmcounter16) - counters[16];
counters[17] = read_csr(mhpmcounter17) - counters[17];
ee_printf("Load Stalls %d\n", counters[3]);
ee_printf("D-Cache Accesses %d\n", counters[11]);
ee_printf("D-Cache Misses %d\n", counters[12]);
ee_printf("I-Cache Accesses %d\n", counters[13]);
ee_printf("I-Cache Misses %d\n", counters[14]);
ee_printf("Branches %d\n", counters[5]);
ee_printf("Branches Miss Predictions %d\n", counters[4]);
ee_printf("BTB Misses %d\n", counters[6]);
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
ee_printf("RAS Wrong %d\n", counters[8]);
ee_printf("Returns %d\n", counters[9]);
ee_printf("Load Stalls %d\n", counters[11]);
ee_printf("D-Cache Accesses %d\n", counters[13]);
ee_printf("D-Cache Misses %d\n", counters[14]);
ee_printf("I-Cache Accesses %d\n", counters[16]);
ee_printf("I-Cache Misses %d\n", counters[17]);
ee_printf("Branches %d\n", counters[3]);
ee_printf("Branches Miss Predictions %d\n", counters[7]);
ee_printf("BTB Misses %d\n", counters[8]);
ee_printf("Jump and JR %d\n", counters[4]);
ee_printf("RAS Wrong %d\n", counters[9]);
ee_printf("Returns %d\n", counters[5]);
ee_printf("BP Class Wrong %d\n", counters[10]);
ee_printf("Done printing performance counters\n");

View File

@ -279,12 +279,13 @@ if(sys.argv[1] == '-b'):
dct[PredType] = (currSize, currPercent)
print(dct)
fig, axes = plt.subplots()
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x', 'tenlocalahead' : '.', 'eightlocalahead' : ',', 'fourlocalahead' : 'x', 'tenlocalrepair' : 'x'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue', 'tenlocalahead' : 'lightblue', 'eightlocalahead' : 'lightblue', 'fourlocalahead' : 'lightblue', 'tenlocalrepair' : 'lightblue'}
for cat in dct:
(x, y) = dct[cat]
x=[int(2**int(v)) for v in x]
print(x, y)
#print(x, y)
print(cat)
axes.plot(x,y, color=colors[cat])
axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
#plt.scatter(x, y, label=cat)

157
config/buildroot/config.vh Normal file
View File

@ -0,0 +1,157 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
// include shared configuration
`include "wally-shared.vh"
`include "BranchPredictorType.vh"
localparam FPGA = 1;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd64;
// IEEE 754 compliance
localparam IEEE754 = 0;
localparam MISA = (32'h0014112D);
localparam ZICSR_SUPPORTED = 1;
localparam ZIFENCEI_SUPPORTED = 1;
localparam ZICOUNTERS_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 1;
localparam ICACHE_SUPPORTED = 1;
localparam VIRTMEM_SUPPORTED = 1;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
localparam BIGENDIAN_SUPPORTED = 1;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd32;
localparam DTLB_ENTRIES = 32'd32;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 1;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd16;
// Address space
localparam RESET_VECTOR = 64'h0000000000001000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h00001FFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h00001FFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 64'h00001000 ;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b1;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b1;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b1;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b1;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Bus Interface width
localparam AHBW = 32'd64;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 0;
// Hardware configuration
localparam UART_PRESCALE = 32'd0;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd53;
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_UART_ID = 32'd10;
localparam PLIC_GPIO_ID = 32'd3;
localparam BPRED_SUPPORTED = 1;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 1;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -132,6 +132,7 @@
`define BPRED_SUPPORTED 1
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10

View File

@ -141,6 +141,7 @@
`define BPRED_SUPPORTED 1
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 12
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10

158
config/rv32e/config.vh Normal file
View File

@ -0,0 +1,158 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd32;
// IEEE 754 compliance
localparam IEEE754 = 0;
// E
localparam MISA = (32'h00000010);
localparam ZICSR_SUPPORTED = 0;
localparam ZIFENCEI_SUPPORTED = 0;
localparam COUNTERS = 12'd0;
localparam ZICOUNTERS_SUPPORTED = 0;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 0;
localparam ICACHE_SUPPORTED = 0;
localparam VIRTMEM_SUPPORTED = 0;
localparam VECTORED_INTERRUPTS_SUPPORTED = 0;
localparam BIGENDIAN_SUPPORTED = 0;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd0;
localparam DTLB_ENTRIES = 32'd0;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd1;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd0;
// Address space
localparam RESET_VECTOR = 64'h80000000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 64'h00001000;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b0;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b0;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b0;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b0;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Bus Interface width
localparam AHBW = 32'd32;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 0;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'd4;
localparam DIVCOPIES = 32'd4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -0,0 +1,178 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
localparam PA_BITS = 34;
//localparam AHBW = 32;
//localparam XLEN = 32;
//localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
////localparam BUS_SUPPORTED = 1'b1;
//localparam ZICSR_SUPPORTED = 1'b0;
localparam M_SUPPORTED = 1'b0;
localparam F_SUPPORTED = 1'b0;
//localparam ZMMUL_SUPPORTED = 1'b0;
//localparam F_SUPPORTED = 1'b0;
//localparam PMP_ENTRIES = 0;
localparam LLEN = 32;
//localparam FPGA = 1'b0;
//localparam QEMU = 1'b0;
// //VPN_SEGMENT_BITS: (LLEN == 32 ? 10 : 9),
// `include "test-shared.vh"
localparam FLEN = 32;
`include "test-shared.vh"
// include shared configuration
//`include "wally-shared.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32;
// IEEE 754 compliance
localparam IEEE754 = 0;
// E
localparam MISA = (32'h00000010);
localparam ZICSR_SUPPORTED = 0;
localparam ZIFENCEI_SUPPORTED = 0;
localparam COUNTERS = 0;
localparam ZICOUNTERS_SUPPORTED = 0;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 0;
localparam ICACHE_SUPPORTED = 0;
localparam VIRTMEM_SUPPORTED = 0;
localparam VECTORED_INTERRUPTS_SUPPORTED = 0;
localparam BIGENDIAN_SUPPORTED = 0;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 0;
localparam DTLB_ENTRIES = 0;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 4;
localparam DCACHE_WAYSIZEINBYTES = 4096;
localparam DCACHE_LINELENINBITS = 512;
localparam ICACHE_NUMWAYS = 4;
localparam ICACHE_WAYSIZEINBYTES = 4096;
localparam ICACHE_LINELENINBITS = 512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 1;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 0;
// Address space
localparam RESET_VECTOR = 32'h80000000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 34'h80000000;
localparam DTIM_RANGE = 34'h007FFFFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 34'h80000000;
localparam IROM_RANGE = 34'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 34'h00001000;
localparam BOOTROM_RANGE = 34'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 34'h80000000;
localparam UNCORE_RAM_RANGE = 34'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 34'h80000000;
localparam EXT_MEM_RANGE = 34'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b0;
localparam CLINT_BASE = 34'h02000000;
localparam CLINT_RANGE = 34'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b0;
localparam GPIO_BASE = 34'h10060000;
localparam GPIO_RANGE = 34'h000000FF;
localparam UART_SUPPORTED = 1'b0;
localparam UART_BASE = 34'h10000000;
localparam UART_RANGE = 34'h00000007;
localparam PLIC_SUPPORTED = 1'b0;
localparam PLIC_BASE = 34'h0C000000;
localparam PLIC_RANGE = 34'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 34'h00012100;
localparam SDC_RANGE = 34'h0000001F;
// Bus Interface width
localparam AHBW = 32;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 3;
localparam PLIC_UART_ID = 10;
localparam BPRED_SUPPORTED = 0;
localparam BPRED_TYPE = "BP_GSHARE"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 10;
localparam BTB_SIZE = 10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 4;
localparam DIVCOPIES = 4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;

View File

@ -136,6 +136,7 @@
`define BPRED_SUPPORTED 0
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10
`define SVADU_SUPPORTED 0

158
config/rv32gc/config.vh Normal file
View File

@ -0,0 +1,158 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
// include shared configuration
// `include "wally-shared.vh"
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd32;
// IEEE 754 compliance
localparam IEEE754 = 0;
localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12 | 1 << 0 | 1 <<3 | 1 << 5);
localparam ZICSR_SUPPORTED = 1;
localparam ZIFENCEI_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 1;
localparam ICACHE_SUPPORTED = 1;
localparam VIRTMEM_SUPPORTED = 1;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
localparam BIGENDIAN_SUPPORTED = 1;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd32;
localparam DTLB_ENTRIES = 32'd32;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 1;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd16;
// Address space
localparam RESET_VECTOR = 64'h80000000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 64'h00001000;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b1;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b1;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b1;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b1;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Bus Interface width
localparam AHBW = 32'd32;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 1;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd16;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 1;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'd4;
localparam DIVCOPIES = 32'd4;
// bit manipulation
localparam ZBA_SUPPORTED = 1;
localparam ZBB_SUPPORTED = 1;
localparam ZBC_SUPPORTED = 1;
localparam ZBS_SUPPORTED = 1;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -133,8 +133,9 @@
`define PLIC_UART_ID 10
`define BPRED_SUPPORTED 1
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 16
`define BPRED_NUM_LHR 8
`define BTB_SIZE 10
`define SVADU_SUPPORTED 1

157
config/rv32i/config.vh Normal file
View File

@ -0,0 +1,157 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd32;
// IEEE 754 compliance
localparam IEEE754 = 0;
// I
localparam MISA = (32'h00000104);
localparam ZICSR_SUPPORTED = 0;
localparam ZIFENCEI_SUPPORTED = 0;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 0;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 0;
localparam DCACHE_SUPPORTED = 0;
localparam ICACHE_SUPPORTED = 0;
localparam VIRTMEM_SUPPORTED = 0;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
localparam BIGENDIAN_SUPPORTED = 0;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd32;
localparam DTLB_ENTRIES = 32'd32;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd0;
// Address space
localparam RESET_VECTOR = 64'h80000000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b1;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b1;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b0;
localparam BOOTROM_BASE = 64'h00001000;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b0;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b0;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b0;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b0;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b0;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Bus Interface width
localparam AHBW = 32'd32;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 0;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -136,6 +136,7 @@
`define BPRED_SUPPORTED 0
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10
`define SVADU_SUPPORTED 0

156
config/rv32imc/config.vh Normal file
View File

@ -0,0 +1,156 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd32;
// IEEE 754 compliance
localparam IEEE754 = 0;
localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12);
localparam ZICSR_SUPPORTED = 1;
localparam ZIFENCEI_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 0;
localparam ICACHE_SUPPORTED = 0;
localparam VIRTMEM_SUPPORTED = 0;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
localparam BIGENDIAN_SUPPORTED = 0;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd0;
localparam DTLB_ENTRIES = 32'd0;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd2;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd0;
// Address space
localparam RESET_VECTOR = 64'h80000000;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
localparam DTIM_SUPPORTED = 1'b1;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b1;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b0;
localparam BOOTROM_BASE = 64'h00001000;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b0;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b1;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b1;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b1;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b1;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Bus Interface width
localparam AHBW = 32'd32;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 0;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -135,6 +135,7 @@
`define BPRED_SUPPORTED 0
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10
`define SVADU_SUPPORTED 0

159
config/rv64fpquad/config.vh Normal file
View File

@ -0,0 +1,159 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd64;
// IEEE 754 compliance
localparam IEEE754 = 0;
// MISA RISC-V configuration per specification
localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
localparam ZICSR_SUPPORTED = 1;
localparam ZIFENCEI_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 1;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 1;
localparam ICACHE_SUPPORTED = 1;
localparam VIRTMEM_SUPPORTED = 1;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
localparam BIGENDIAN_SUPPORTED = 1;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd32;
localparam DTLB_ENTRIES = 32'd32;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 1;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd16;
// Address space
localparam RESET_VECTOR = 64'h0000000080000000;
// Bus Interface width
localparam AHBW = 32'd64;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Physiccal Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b1;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b1;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b1;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b1;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 1;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -138,6 +138,7 @@
`define BPRED_SUPPORTED 1
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10
`define SVADU_SUPPORTED 0

162
config/rv64gc/config.vh Normal file
View File

@ -0,0 +1,162 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
// include shared configuration
// `include "wally-shared.vh"
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd64;
// IEEE 754 compliance
localparam IEEE754 = 0;
// MISA RISC-V configuration per specification
localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0);
localparam ZICSR_SUPPORTED = 1;
localparam ZIFENCEI_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 1;
localparam DCACHE_SUPPORTED = 1;
localparam ICACHE_SUPPORTED = 1;
localparam VIRTMEM_SUPPORTED = 1;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
localparam BIGENDIAN_SUPPORTED = 1;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd32;
localparam DTLB_ENTRIES = 32'd32;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 1;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd16;
// Address space
localparam RESET_VECTOR = 64'h0000000080000000;
// Bus Interface width
localparam AHBW = 32'd64;
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Physical Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
localparam DTIM_SUPPORTED = 1'b0;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b0;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b1;
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder;
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b1;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b1;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b1;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b1;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b1;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 1;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_NUM_LHR = 32'd6;
localparam BPRED_SIZE = 32'd10;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 1;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 1;
localparam ZBB_SUPPORTED = 1;
localparam ZBC_SUPPORTED = 1;
localparam ZBS_SUPPORTED = 1;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -136,8 +136,10 @@
`define PLIC_UART_ID 10
`define BPRED_SUPPORTED 1
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
//`define BPRED_TYPE "BP_GLOBAL_BASIC" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 4
`define BTB_SIZE 10
`define SVADU_SUPPORTED 1

159
config/rv64i/config.vh Normal file
View File

@ -0,0 +1,159 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "BranchPredictorType.vh"
localparam FPGA = 0;
localparam QEMU = 0;
// RV32 or RV64: XLEN = 32 or 64
localparam XLEN = 32'd64;
// IEEE 754 compliance
localparam IEEE754 = 0;
// MISA RISC-V configuration per specification
localparam MISA = (32'h00000104);
localparam ZICSR_SUPPORTED = 0;
localparam ZIFENCEI_SUPPORTED = 0;
localparam COUNTERS = 12'd32;
localparam ZICOUNTERS_SUPPORTED = 0;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
// LSU microarchitectural Features
localparam BUS_SUPPORTED = 0;
localparam DCACHE_SUPPORTED = 0;
localparam ICACHE_SUPPORTED = 0;
localparam VIRTMEM_SUPPORTED = 0;
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
localparam BIGENDIAN_SUPPORTED = 0;
// TLB configuration. Entries should be a power of 2
localparam ITLB_ENTRIES = 32'd0;
localparam DTLB_ENTRIES = 32'd0;
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
localparam DCACHE_NUMWAYS = 32'd4;
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
localparam DCACHE_LINELENINBITS = 32'd512;
localparam ICACHE_NUMWAYS = 32'd4;
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd0;
// Address space
localparam RESET_VECTOR = 64'h0000000080000000;
// Bus Interface width
localparam AHBW = (XLEN);
// WFI Timeout Wait
localparam WFI_TIMEOUT_BIT = 32'd16;
// Peripheral Physiccal Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
localparam DTIM_SUPPORTED = 1'b1;
localparam DTIM_BASE = 64'h80000000;
localparam DTIM_RANGE = 64'h007FFFFF;
localparam IROM_SUPPORTED = 1'b1;
localparam IROM_BASE = 64'h80000000;
localparam IROM_RANGE = 64'h007FFFFF;
localparam BOOTROM_SUPPORTED = 1'b0;
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
localparam BOOTROM_RANGE = 64'h00000FFF;
localparam UNCORE_RAM_SUPPORTED = 1'b0;
localparam UNCORE_RAM_BASE = 64'h80000000;
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
localparam EXT_MEM_SUPPORTED = 1'b0;
localparam EXT_MEM_BASE = 64'h80000000;
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
localparam CLINT_SUPPORTED = 1'b0;
localparam CLINT_BASE = 64'h02000000;
localparam CLINT_RANGE = 64'h0000FFFF;
localparam GPIO_SUPPORTED = 1'b0;
localparam GPIO_BASE = 64'h10060000;
localparam GPIO_RANGE = 64'h000000FF;
localparam UART_SUPPORTED = 1'b0;
localparam UART_BASE = 64'h10000000;
localparam UART_RANGE = 64'h00000007;
localparam PLIC_SUPPORTED = 1'b0;
localparam PLIC_BASE = 64'h0C000000;
localparam PLIC_RANGE = 64'h03FFFFFF;
localparam SDC_SUPPORTED = 1'b0;
localparam SDC_BASE = 64'h00012100;
localparam SDC_RANGE = 64'h0000001F;
// Test modes
// Tie GPIO outputs back to inputs
localparam GPIO_LOOPBACK_TEST = 1;
// Hardware configuration
localparam UART_PRESCALE = 32'd1;
// Interrupt configuration
localparam PLIC_NUM_SRC = 32'd10;
// comment out the following if >=32 sources
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
localparam PLIC_GPIO_ID = 32'd3;
localparam PLIC_UART_ID = 32'd10;
localparam BPRED_SUPPORTED = 0;
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_SIZE = 32'd10;
localparam BPRED_NUM_LHR = 32'd6;
localparam BTB_SIZE = 32'd10;
localparam SVADU_SUPPORTED = 0;
localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'h4;
localparam DIVCOPIES = 32'h4;
// bit manipulation
localparam ZBA_SUPPORTED = 0;
localparam ZBB_SUPPORTED = 0;
localparam ZBC_SUPPORTED = 0;
localparam ZBS_SUPPORTED = 0;
// Memory synthesis configuration
localparam USE_SRAM = 0;
`include "test-shared.vh"

View File

@ -138,6 +138,7 @@
`define BPRED_SUPPORTED 0
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
`define BPRED_SIZE 10
`define BPRED_NUM_LHR 6
`define BTB_SIZE 10
`define SVADU_SUPPORTED 0

View File

@ -0,0 +1,3 @@
typedef enum {BP_TWOBIT, BP_GSHARE, BP_GLOBAL, BP_GSHARE_BASIC,
BP_GLOBAL_BASIC, BP_LOCAL_BASIC, BP_LOCAL_AHEAD, BP_LOCAL_REPAIR} BranchPredictorType;

View File

@ -0,0 +1,165 @@
// Populate parameter structure with values specific to the current configuration
parameter cvw_t P = '{
FPGA : FPGA,
QEMU : QEMU,
XLEN : XLEN,
IEEE754 : IEEE754,
MISA : MISA,
AHBW : AHBW,
ZICSR_SUPPORTED : ZICSR_SUPPORTED,
ZIFENCEI_SUPPORTED : ZIFENCEI_SUPPORTED,
COUNTERS : COUNTERS,
ZICOUNTERS_SUPPORTED : ZICOUNTERS_SUPPORTED,
ZFH_SUPPORTED : ZFH_SUPPORTED,
SSTC_SUPPORTED : SSTC_SUPPORTED,
VIRTMEM_SUPPORTED : VIRTMEM_SUPPORTED,
VECTORED_INTERRUPTS_SUPPORTED : VECTORED_INTERRUPTS_SUPPORTED,
BIGENDIAN_SUPPORTED : BIGENDIAN_SUPPORTED,
SVADU_SUPPORTED : SVADU_SUPPORTED,
ZMMUL_SUPPORTED : ZMMUL_SUPPORTED,
BUS_SUPPORTED : BUS_SUPPORTED,
DCACHE_SUPPORTED : DCACHE_SUPPORTED,
ICACHE_SUPPORTED : ICACHE_SUPPORTED,
ITLB_ENTRIES : ITLB_ENTRIES,
DTLB_ENTRIES : DTLB_ENTRIES,
DCACHE_NUMWAYS : DCACHE_NUMWAYS,
DCACHE_WAYSIZEINBYTES : DCACHE_WAYSIZEINBYTES,
DCACHE_LINELENINBITS : DCACHE_LINELENINBITS,
ICACHE_NUMWAYS : ICACHE_NUMWAYS,
ICACHE_WAYSIZEINBYTES : ICACHE_WAYSIZEINBYTES,
ICACHE_LINELENINBITS : ICACHE_LINELENINBITS,
IDIV_BITSPERCYCLE : IDIV_BITSPERCYCLE,
IDIV_ON_FPU : IDIV_ON_FPU,
PMP_ENTRIES : PMP_ENTRIES,
RESET_VECTOR : RESET_VECTOR,
WFI_TIMEOUT_BIT : WFI_TIMEOUT_BIT,
DTIM_SUPPORTED : DTIM_SUPPORTED,
DTIM_BASE : DTIM_BASE,
DTIM_RANGE : DTIM_RANGE,
IROM_SUPPORTED : IROM_SUPPORTED,
IROM_BASE : IROM_BASE,
IROM_RANGE : IROM_RANGE,
BOOTROM_SUPPORTED : BOOTROM_SUPPORTED,
BOOTROM_BASE : BOOTROM_BASE,
BOOTROM_RANGE : BOOTROM_RANGE,
UNCORE_RAM_SUPPORTED : UNCORE_RAM_SUPPORTED,
UNCORE_RAM_BASE : UNCORE_RAM_BASE,
UNCORE_RAM_RANGE : UNCORE_RAM_RANGE,
EXT_MEM_SUPPORTED : EXT_MEM_SUPPORTED,
EXT_MEM_BASE : EXT_MEM_BASE,
EXT_MEM_RANGE : EXT_MEM_RANGE,
CLINT_SUPPORTED : CLINT_SUPPORTED,
CLINT_BASE : CLINT_BASE,
CLINT_RANGE : CLINT_RANGE,
GPIO_SUPPORTED : GPIO_SUPPORTED,
GPIO_BASE : GPIO_BASE,
GPIO_RANGE : GPIO_RANGE,
UART_SUPPORTED : UART_SUPPORTED,
UART_BASE : UART_BASE,
UART_RANGE : UART_RANGE,
PLIC_SUPPORTED : PLIC_SUPPORTED,
PLIC_BASE : PLIC_BASE,
PLIC_RANGE : PLIC_RANGE,
SDC_SUPPORTED : SDC_SUPPORTED,
SDC_BASE : SDC_BASE,
SDC_RANGE : SDC_RANGE,
GPIO_LOOPBACK_TEST : GPIO_LOOPBACK_TEST,
UART_PRESCALE : UART_PRESCALE ,
PLIC_NUM_SRC : PLIC_NUM_SRC,
PLIC_NUM_SRC_LT_32 : PLIC_NUM_SRC_LT_32,
PLIC_GPIO_ID : PLIC_GPIO_ID,
PLIC_UART_ID : PLIC_UART_ID,
BPRED_SUPPORTED : BPRED_SUPPORTED,
BPRED_TYPE : BPRED_TYPE,
BPRED_SIZE : BPRED_SIZE,
BPRED_NUM_LHR : BPRED_NUM_LHR,
BTB_SIZE : BTB_SIZE,
RADIX : RADIX,
DIVCOPIES : DIVCOPIES,
ZBA_SUPPORTED : ZBA_SUPPORTED,
ZBB_SUPPORTED : ZBB_SUPPORTED,
ZBC_SUPPORTED : ZBC_SUPPORTED,
ZBS_SUPPORTED : ZBS_SUPPORTED,
USE_SRAM : USE_SRAM,
M_MODE : M_MODE,
S_MODE : S_MODE,
U_MODE : U_MODE,
VPN_SEGMENT_BITS : VPN_SEGMENT_BITS,
VPN_BITS : VPN_BITS,
PPN_BITS : PPN_BITS,
PA_BITS : PA_BITS,
SVMODE_BITS : SVMODE_BITS,
ASID_BASE : ASID_BASE,
ASID_BITS : ASID_BITS,
NO_TRANSLATE : NO_TRANSLATE,
SV32 : SV32,
SV39 : SV39,
SV48 : SV48,
A_SUPPORTED : A_SUPPORTED,
B_SUPPORTED : B_SUPPORTED,
C_SUPPORTED : C_SUPPORTED,
D_SUPPORTED : D_SUPPORTED,
E_SUPPORTED : E_SUPPORTED,
F_SUPPORTED : F_SUPPORTED,
I_SUPPORTED : I_SUPPORTED,
M_SUPPORTED : M_SUPPORTED,
Q_SUPPORTED : Q_SUPPORTED,
S_SUPPORTED : S_SUPPORTED,
U_SUPPORTED : U_SUPPORTED,
LOG_XLEN : LOG_XLEN,
PMPCFG_ENTRIES : PMPCFG_ENTRIES,
Q_LEN : Q_LEN,
Q_NE : Q_NE,
Q_NF : Q_NF,
Q_BIAS : Q_BIAS,
Q_FMT : Q_FMT,
D_LEN : D_LEN,
D_NE : D_NE,
D_NF : D_NF,
D_BIAS : D_BIAS,
D_FMT : D_FMT,
S_LEN : S_LEN,
S_NE : S_NE,
S_NF : S_NF,
S_BIAS : S_BIAS,
S_FMT : S_FMT,
H_LEN : H_LEN,
H_NE : H_NE,
H_NF : H_NF,
H_BIAS : H_BIAS,
H_FMT : H_FMT,
FLEN : FLEN,
NE : NE ,
NF : NF ,
FMT : FMT ,
BIAS : BIAS,
FPSIZES : FPSIZES,
FMTBITS : FMTBITS,
LEN1 : LEN1 ,
NE1 : NE1 ,
NF1 : NF1 ,
FMT1 : FMT1 ,
BIAS1 : BIAS1,
LEN2 : LEN2 ,
NE2 : NE2 ,
NF2 : NF2 ,
FMT2 : FMT2 ,
BIAS2 : BIAS2,
CVTLEN : CVTLEN,
LLEN : LLEN,
LOGCVTLEN : LOGCVTLEN,
NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
CORRSHIFTSZ : CORRSHIFTSZ,
DIVN : DIVN,
LOGR : LOGR,
RK : RK,
LOGRK : LOGRK,
FPDUR : FPDUR,
DURLEN : DURLEN,
DIVb : DIVb,
DIVBLEN : DIVBLEN,
DIVa : DIVa
};

View File

@ -0,0 +1,119 @@
// constants defining different privilege modes
// defined in Table 1.1 of the privileged spec
localparam M_MODE = (2'b11);
localparam S_MODE = (2'b01);
localparam U_MODE = (2'b00);
// Virtual Memory Constants
localparam VPN_SEGMENT_BITS = (XLEN == 32 ? 32'd10 : 32'd9);
localparam VPN_BITS = (XLEN==32 ? (2*VPN_SEGMENT_BITS) : (4*VPN_SEGMENT_BITS));
localparam PPN_BITS = (XLEN==32 ? 32'd22 : 32'd44);
localparam PA_BITS = (XLEN==32 ? 32'd34 : 32'd56);
localparam SVMODE_BITS = (XLEN==32 ? 32'd1 : 32'd4);
localparam ASID_BASE = (XLEN==32 ? 32'd22 : 32'd44);
localparam ASID_BITS = (XLEN==32 ? 32'd9 : 32'd16);
// constants to check SATP_MODE against
// defined in Table 4.3 of the privileged spec
localparam NO_TRANSLATE = 4'd0;
localparam SV32 = 4'd1;
localparam SV39 = 4'd8;
localparam SV48 = 4'd9;
// macros to define supported modes
localparam A_SUPPORTED = ((MISA >> 0) % 2 == 1);
localparam B_SUPPORTED = ((ZBA_SUPPORTED | ZBB_SUPPORTED | ZBC_SUPPORTED | ZBS_SUPPORTED));// not based on MISA
localparam C_SUPPORTED = ((MISA >> 2) % 2 == 1);
localparam D_SUPPORTED = ((MISA >> 3) % 2 == 1);
localparam E_SUPPORTED = ((MISA >> 4) % 2 == 1);
localparam F_SUPPORTED = ((MISA >> 5) % 2 == 1);
localparam I_SUPPORTED = ((MISA >> 8) % 2 == 1);
localparam M_SUPPORTED = ((MISA >> 12) % 2 == 1);
localparam Q_SUPPORTED = ((MISA >> 16) % 2 == 1);
localparam S_SUPPORTED = ((MISA >> 18) % 2 == 1);
localparam U_SUPPORTED = ((MISA >> 20) % 2 == 1);
// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
// logarithm of XLEN, used for number of index bits to select
localparam LOG_XLEN = (XLEN == 32 ? 32'd5 : 32'd6);
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
localparam PMPCFG_ENTRIES = (PMP_ENTRIES/32'd8);
// Floating point constants for Quad, Double, Single, and Half precisions
// Lim: I've made some of these 64 bit to avoid width warnings.
// If errors crop up, try downsizing back to 32.
localparam Q_LEN = 32'd128;
localparam Q_NE = 32'd15;
localparam Q_NF = 32'd112;
localparam Q_BIAS = 32'd16383;
localparam Q_FMT = 2'd3;
localparam D_LEN = 32'd64;
localparam D_NE = 32'd11;
localparam D_NF = 32'd52;
localparam D_BIAS = 32'd1023;
localparam D_FMT = 2'd1;
localparam S_LEN = 32'd32;
localparam S_NE = 32'd8;
localparam S_NF = 32'd23;
localparam S_BIAS = 32'd127;
localparam S_FMT = 2'd0;
localparam H_LEN = 32'd16;
localparam H_NE = 32'd5;
localparam H_NF = 32'd10;
localparam H_BIAS = 32'd15;
localparam H_FMT = 2'd2;
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN);
localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE);
localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF);
localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0);
localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS);
/* Delete once tested dh 10/10/22
localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : F_SUPPORTED ? S_LEN : H_LEN);
localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : F_SUPPORTED ? S_NE : H_NE);
localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : F_SUPPORTED ? S_NF : H_NF);
localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : F_SUPPORTED ? 2'd0 : 2'd2);
localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/
// Floating point constants needed for FPU paramerterization
localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED));
localparam FMTBITS = ((32)'(FPSIZES>=3)+1);
localparam LEN1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN : H_LEN);
localparam NE1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE : H_NE);
localparam NF1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF : H_NF);
localparam FMT1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1 : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0 : 2'd2);
localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS);
localparam LEN2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN : H_LEN);
localparam NE2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE : H_NE);
localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
// division constants
localparam DIVN = (((NF<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
localparam LOGR = ($clog2(RADIX)); // r = log(R)
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
localparam LOGRK = ($clog2(RK)); // log2(r*k)
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
localparam DURLEN = ($clog2(FPDUR+1));
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
localparam DIVBLEN = ($clog2(DIVb+1)-1);
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu
// largest length in IEU/FPU
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF));
localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($unsigned(FLEN)));
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */
/* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */

View File

@ -46,18 +46,33 @@ configs = [
)
]
# bpdSize = [6, 8, 10, 12, 14, 16]
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# name = CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
bpdSize = [6, 8, 10, 12, 14, 16]
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
LHRSize = [4, 8, 10]
bpdType = ['local_repair']
for CurrBPType in bpdType:
for CurrBPSize in bpdSize:
name = CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
for CurrLHRSize in LHRSize:
name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
import os
from multiprocessing import Pool, TimeoutError

View File

@ -8,7 +8,7 @@ basepath=$(dirname $0)/..
for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do
#for config in rv64gc; do
echo "$config linting..."
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/wally/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
echo "Exiting after $config lint due to errors or warnings"
exit 1
fi

View File

@ -59,7 +59,7 @@ if {$argc >= 3} {
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined-batch.do ../config/rv32imc rv32imc
if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
# start and run simulation
if { $coverage } {
echo "wally-batch buildroot coverage"
@ -88,7 +88,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
exec ./slack-notifier/slack-notifier.py
} elseif {$2 eq "ahb"} {
vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4
vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt wkdir/work_${1}_${2}_${3}_${4}.testbench -work wkdir/work_${1}_${2}_${3}_${4} -G TEST=$2 -o testbenchopt
@ -112,7 +112,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
# **** fix this so we can pass any number of +defines.
# only allows 3 right now
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt
@ -126,7 +126,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
# power off -r /dut/core/*
} else {
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
if {$coverage} {

View File

@ -6,12 +6,6 @@ add wave -noupdate /testbench/reset
add wave -noupdate /testbench/reset_ext
add wave -noupdate /testbench/memfilename
add wave -noupdate /testbench/dut/core/SATP_REGW
add wave -noupdate /testbench/FunctionName/FunctionName/PCD
add wave -noupdate /testbench/FunctionName/FunctionName/PCE
add wave -noupdate /testbench/FunctionName/FunctionName/PCF
add wave -noupdate /testbench/FunctionName/FunctionName/PCM
add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp
add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld
add wave -noupdate /testbench/dut/core/InstrValidM
add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr
add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex
@ -38,15 +32,16 @@ add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushD
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushE
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushM
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/HPTWInstrAccessFaultM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/PostSpillInstrRawF
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
@ -66,10 +61,10 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
@ -99,13 +94,13 @@ add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/STVEC_REGW
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FRM_REGW
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FFLAGS_REGW
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/STATUS_FS
add wave -noupdate -group Bpred -group {branch update selection inputs} -divider {class check}
add wave -noupdate -group Bpred -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
add wave -noupdate -group Bpred -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -divider {class check}
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCF
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
@ -120,8 +115,8 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/Result
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUResult
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/BALUControl
add wave -noupdate -group alu -divider internals
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
@ -152,23 +147,24 @@ add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUDisable
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/IFUSelect
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUSelect
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/ebufsmarb/CurrState
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HTRANS
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HBURST
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/HRDATA
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUReq
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/HRDATA
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
add wave -noupdate -group AHB -expand -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
add wave -noupdate -group AHB -group IFU /testbench/dut/core/HRDATA
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUReq
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
add wave -noupdate -group AHB -group LSU /testbench/dut/core/HRDATA
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
add wave -noupdate -group AHB -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HCLK
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HRESETn
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HREADY
@ -210,12 +206,12 @@ add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/d
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CAdr
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CacheSet
add wave -noupdate -group lsu -group dcache -group {replacement policy} -color {Orange Red} {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]}
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
@ -235,7 +231,7 @@ add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/PAdr
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
@ -252,60 +248,59 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM[62]}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM[62]}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/we}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidWay}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
@ -323,14 +318,7 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteData
add wave -noupdate -group lsu -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextSet
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord
@ -362,22 +350,27 @@ add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMASt
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWalk
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/WalkerState
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWAdr
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/NextPageType
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PageType
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ValidNonLeafPTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/SelHPTW
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWStall
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/DTLBWalk
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/hptw/WalkerState
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWAdr
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/NextPageType
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PageType
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/ValidNonLeafPTE
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ITLBWriteF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWriteM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSULoadAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSUStoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWInstrAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LoadAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/StoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/ITLBWriteF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/DTLBWriteM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/DCacheStallM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFaultF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSULoadAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUStoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LoadAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/StoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFault
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/MExtInt
@ -396,9 +389,9 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/un
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/max_priority_with_irqs
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/irqs_at_max_priority
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/threshMask
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIN
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOOUT
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOEN
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PSEL
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PADDR
@ -462,7 +455,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/PostSpillInstrRawF
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUStallF
add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/Spill/spill/CurrState
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/SpillF
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallD
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallF
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF
add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE
@ -483,54 +476,38 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/bwe}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/dout}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/VAdr
@ -556,21 +533,21 @@ add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/c
add wave -noupdate -expand -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]}
add wave -noupdate -expand -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]}
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]}
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Access} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Access} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]}
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]}
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]}
@ -609,34 +586,22 @@ add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELNoneD
add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELPLICD
add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HRDATA
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/rd
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
add wave -noupdate -group {branch direction} -expand -group {branch outcome} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCSrcE
add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushE
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRD
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRE
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushD
add wave -noupdate -group {branch direction} -expand -group nextghr2 /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexE
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/StallM
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheAccess
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheMiss
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAccess
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH
add wave -noupdate /testbench/HPMCSample/EndSample
add wave -noupdate /testbench/HPMCSample/StartSample
add wave -noupdate /testbench/dut/core/ifu/PCF
add wave -noupdate /testbench/reset
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPDirPredD
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BranchM
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/NewBPDirPredM
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {23 ns} 0} {{Cursor 5} {394987 ns} 1}
quietly wave cursor active 4
WaveRestoreCursors {{Cursor 4} {12208 ns} 1} {{Cursor 4} {435726 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 250
configure wave -valuecolwidth 194
configure wave -justifyvalue left
@ -651,4 +616,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {0 ns} {52 ns}
WaveRestoreZoom {435627 ns} {435795 ns}

14
src/cache/cache.sv vendored
View File

@ -27,9 +27,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
module cache #(parameter PA_BITS, XLEN, LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
input logic clk,
input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
@ -40,7 +38,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
input logic FlushCache, // Flush all dirty lines back to memory
input logic InvalidateCache, // Clear all valid bits
input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [PA_BITS-1:0] PAdr, // Physical address
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
@ -57,7 +55,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback)
output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
output logic [PA_BITS-1:0] CacheBusAdr // Address for bus access
);
// Cache parameters
@ -65,7 +63,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
localparam SETLEN = $clog2(NUMLINES); // Number of set bits
localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
localparam TAGLEN = PA_BITS - SETTOP; // Number of tag bits
localparam CACHEWORDSPERLINE = LINELEN/WORDLEN;// Number of words in cache line
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);// Log2 of ^
localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
@ -114,7 +112,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
AdrSelMuxSel, CacheSet);
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
cacheway #(PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
@ -152,7 +150,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
.PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
// Bus address for fetch, writeback, or flush writeback
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
mux3 #(PA_BITS) CacheBusAdrMux(.d0({PAdr[PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));

View File

@ -113,7 +113,7 @@ module cacheLRU
// The root node of the LRU tree will always be selected in LRUUpdate. No mux needed.
assign NextLRU[NUMWAYS-2] = ~WayExpanded[NUMWAYS-2];
mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);
if (NUMWAYS > 2) mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);
// Compute next victim way.
for(node = NUMWAYS-2; node >= NUMWAYS/2; node--) begin

16
src/cache/cacheway.sv vendored
View File

@ -27,16 +27,14 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
module cacheway #(parameter PA_BITS, XLEN, NUMLINES=512, LINELEN = 256, TAGLEN = 26,
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
input logic clk,
input logic reset,
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [PA_BITS-1:0] PAdr, // Physical address
input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only)
input logic SetValid, // Set the valid bit in the selected way and set
input logic SetDirty, // Set the dirty bit in the selected way and set
@ -54,11 +52,11 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
output logic DirtyWay, // This way is dirty
output logic [TAGLEN-1:0] TagWay); // This way's tag if valid
localparam WORDSPERLINE = LINELEN/`XLEN;
localparam WORDSPERLINE = LINELEN/XLEN;
localparam BYTESPERLINE = LINELEN/8;
localparam LOGWPL = $clog2(WORDSPERLINE);
localparam LOGXLENBYTES = $clog2(`XLEN/8);
localparam BYTESPERWORD = `XLEN/8;
localparam LOGXLENBYTES = $clog2(XLEN/8);
localparam BYTESPERWORD = XLEN/8;
logic [NUMLINES-1:0] ValidBits;
logic [NUMLINES-1:0] DirtyBits;
@ -113,12 +111,12 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
.addr(CacheSet), .dout(ReadTag),
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
.din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
// AND portion of distributed tag multiplexer
assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux
assign DirtyWay = SelTag & Dirty & ValidWay;
assign HitWay = ValidWay & (ReadTag == PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]);
assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]);
/////////////////////////////////////////////////////////////////////////////////////////////
// Data Array

View File

@ -27,9 +27,10 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module ahbcacheinterface #(
parameter AHBW,
parameter LLEN,
parameter PA_BITS,
parameter BEATSPERLINE, // Number of AHBW words (beats) in cacheline
parameter AHBWLOGBWPL, // Log2 of ^
parameter LINELEN, // Number of bits in cacheline
@ -44,14 +45,14 @@ module ahbcacheinterface #(
output logic [2:0] HSIZE, // AHB transaction width
output logic [2:0] HBURST, // AHB burst length
// bus interface buses
input logic [`AHBW-1:0] HRDATA, // AHB read data
output logic [`PA_BITS-1:0] HADDR, // AHB address
output logic [`AHBW-1:0] HWDATA, // AHB write data
output logic [`AHBW/8-1:0] HWSTRB, // AHB byte mask
input logic [AHBW-1:0] HRDATA, // AHB read data
output logic [PA_BITS-1:0] HADDR, // AHB address
output logic [AHBW-1:0] HWDATA, // AHB write data
output logic [AHBW/8-1:0] HWSTRB, // AHB byte mask
// cache interface
input logic [`PA_BITS-1:0] CacheBusAdr, // Address of cache line
input logic [`LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback
input logic [PA_BITS-1:0] CacheBusAdr, // Address of cache line
input logic [LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback
input logic CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$
input logic Cacheable, // Memory operation is cachable
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
@ -61,8 +62,8 @@ module ahbcacheinterface #(
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
// uncached interface
input logic [`PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
input logic [`LLEN-1:0] WriteDataM, // IEU write data for uncached store
input logic [PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
input logic [2:0] Funct3, // Size of uncached memory operation
@ -74,11 +75,11 @@ module ahbcacheinterface #(
localparam BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index
logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
logic [PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage
logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data
logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data
genvar index;
@ -86,35 +87,35 @@ module ahbcacheinterface #(
for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer
logic [BEATSPERLINE-1:0] CaptureBeat;
assign CaptureBeat[index] = CaptureEn & (index == BeatCountDelayed);
flopen #(`AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
.q(FetchBuffer[(index+1)*`AHBW-1:index*`AHBW]));
flopen #(AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
.q(FetchBuffer[(index+1)*AHBW-1:index*AHBW]));
end
mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR;
mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
// When AHBW is less than LLEN need extra muxes to select the subword from cache's read data.
logic [`AHBW-1:0] CacheReadDataWordAHB;
logic [AHBW-1:0] CacheReadDataWordAHB;
if(LLENPOVERAHBW > 1) begin
logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0];
logic [AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0];
genvar index;
for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux
assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)];
assign AHBWordSets[index] = CacheReadDataWordM[(index*AHBW)+AHBW-1: (index*AHBW)];
end
assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]];
end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0];
end else assign CacheReadDataWordAHB = CacheReadDataWordM[AHBW-1:0];
mux2 #(`AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[`AHBW-1:0]),
mux2 #(AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[AHBW-1:0]),
.s(~(CacheableOrFlushCacheM)), .y(PreHWDATA));
flopen #(`AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
flopen #(AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
// *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
// probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB);
flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB);
buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm(
.HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,

View File

@ -27,9 +27,8 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module ahbinterface #(
parameter XLEN,
parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
)(
input logic HCLK, HRESETn,
@ -37,30 +36,30 @@ module ahbinterface #(
input logic HREADY, // AHB peripheral ready
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITE, // AHB 0: Read operation 1: Write operation
input logic [`XLEN-1:0] HRDATA, // AHB read data
output logic [`XLEN-1:0] HWDATA, // AHB write data
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
input logic [XLEN-1:0] HRDATA, // AHB read data
output logic [XLEN-1:0] HWDATA, // AHB write data
output logic [XLEN/8-1:0] HWSTRB, // AHB byte mask
// lsu/ifu interface
input logic Stall, // Core pipeline is stalled
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
input logic [`XLEN-1:0] WriteData, // IEU write data for a store
input logic [XLEN/8-1:0] ByteMask, // Bytes enables within a word
input logic [XLEN-1:0] WriteData, // IEU write data for a store
output logic BusStall, // Bus is busy with an in flight memory operation
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
output logic [(LSU ? XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
);
logic CaptureEn;
localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU
localparam LEN = (LSU ? XLEN : 32); // 32 bits for IFU, XLEN for LSU
flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer));
if(LSU) begin
// delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN
flop #(`XLEN) wdreg(HCLK, WriteData, HWDATA);
flop #(`XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
flop #(XLEN) wdreg(HCLK, WriteData, HWDATA);
flop #(XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
end else begin
assign HWDATA = '0;
assign HWSTRB = '0;

View File

@ -27,7 +27,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define BURST_EN 1 // Enables burst mode. Disable to show the lost performance.
// HCLK and clk must be the same clock!

View File

@ -27,8 +27,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// HCLK and clk must be the same clock!
module busfsm (
input logic HCLK,

View File

@ -31,9 +31,8 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module controllerinput #(
parameter PA_BITS,
parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs
)(
input logic HCLK,
@ -47,14 +46,14 @@ module controllerinput #(
input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
input logic [2:0] HBURSTIn, // Manager input. AHB burst length
input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address
input logic [PA_BITS-1:0] HADDRIn, // Manager input. AHB address
output logic HREADYOut, // Indicate to manager the peripheral is not busy and another manager does not have priority
// controller output
output logic [1:0] HTRANSOut, // Arbitrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
output logic HWRITEOut, // Arbitrated manager transaction. AHB 0: Read operation 1: Write operation
output logic [2:0] HSIZEOut, // Arbitrated manager transaction. AHB transaction width
output logic [2:0] HBURSTOut, // Arbitrated manager transaction. AHB burst length
output logic [`PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address
output logic [PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address
input logic HREADYIn // Peripheral ready
);
@ -62,13 +61,13 @@ module controllerinput #(
logic [2:0] HSIZESave;
logic [2:0] HBURSTSave;
logic [1:0] HTRANSSave;
logic [`PA_BITS-1:0] HADDRSave;
logic [PA_BITS-1:0] HADDRSave;
if (SAVE_ENABLED) begin
flopenr #(1+3+3+2+`PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
flopenr #(1+3+3+2+PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
{HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
{HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave});
mux2 #(1+3+3+2+`PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
mux2 #(1+3+3+2+PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
{HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave},
Restore,
{HWRITEOut, HSIZEOut, HBURSTOut, HTRANSOut, HADDROut});

View File

@ -31,33 +31,31 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module ebu (
module ebu #(parameter XLEN, PA_BITS, AHBW)(
input logic clk, reset,
// Signals from IFU
input logic [1:0] IFUHTRANS, // IFU AHB transaction request
input logic [2:0] IFUHSIZE, // IFU AHB transaction size
input logic [2:0] IFUHBURST, // IFU AHB burst length
input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address
input logic [PA_BITS-1:0] IFUHADDR, // IFU AHB address
output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant
// Signals from LSU
input logic [1:0] LSUHTRANS, // LSU AHB transaction request
input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read
input logic [2:0] LSUHSIZE, // LSU AHB size
input logic [2:0] LSUHBURST, // LSU AHB burst length
input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address
input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
input logic [PA_BITS-1:0] LSUHADDR, // LSU AHB address
input logic [XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
input logic [XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority
// AHB-Lite external signals
output logic HCLK, HRESETn,
input logic HREADY, // AHB peripheral ready
input logic HRESP, // AHB peripheral response. 0: OK 1: Error
output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
output logic [PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
output logic [AHBW-1:0] HWDATA, // AHB Write data after arbitration
output logic [XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
output logic HWRITE, // AHB transaction direction after arbitration
output logic [2:0] HSIZE, // AHB transaction size after arbitration
output logic [2:0] HBURST, // AHB burst length after arbitration
@ -73,13 +71,13 @@ module ebu (
logic IFUDisable;
logic IFUSelect;
logic [`PA_BITS-1:0] IFUHADDROut;
logic [PA_BITS-1:0] IFUHADDROut;
logic [1:0] IFUHTRANSOut;
logic [2:0] IFUHBURSTOut;
logic [2:0] IFUHSIZEOut;
logic IFUHWRITEOut;
logic [`PA_BITS-1:0] LSUHADDROut;
logic [PA_BITS-1:0] LSUHADDROut;
logic [1:0] LSUHTRANSOut;
logic [2:0] LSUHBURSTOut;
logic [2:0] LSUHSIZEOut;
@ -98,14 +96,14 @@ module ebu (
// input stages and muxing for IFU and LSU
////////////////////////////////////////////////////////////////////////////////////////////////////
controllerinput IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
controllerinput #(PA_BITS) IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
.Request(IFUReq),
.HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR),
.HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY),
.HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY));
// LSU always has priority so there should never be a need to save and restore the address phase inputs.
controllerinput #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
controllerinput #(PA_BITS, 0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
.Request(LSUReq),
.HWRITEIn(LSUHWRITE), .HSIZEIn(LSUHSIZE), .HBURSTIn(LSUHBURST), .HTRANSIn(LSUHTRANS), .HADDRIn(LSUHADDR), .HREADYOut(LSUHREADY),
.HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut),

View File

@ -28,8 +28,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module ebufsmarb (
input logic HCLK,
input logic HRESETn,

View File

@ -25,16 +25,15 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fclassify (
module fclassify import cvw::*; #(parameter cvw_t P) (
input logic Xs, // sign bit
input logic XNaN, // is NaN
input logic XSNaN, // is signaling NaN
input logic XSubnorm, // is Subnormal
input logic XZero, // is zero
input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassRes // classify result
output logic [P.XLEN-1:0] ClassRes // classify result
);
logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal
@ -63,6 +62,6 @@ module fclassify (
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
assign ClassRes = {{P.XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
endmodule

View File

@ -27,8 +27,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// OpCtrl values
// 110 min
// 101 max
@ -36,23 +34,23 @@
// 001 less than
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] Fmt, // format of fp number
module fcmp import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] OpCtrl, // see above table
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] Xm, Ym, // input mantissa
input logic [P.NE-1:0] Xe, Ye, // input exponents
input logic [P.NF:0] Xm, Ym, // input mantissa
input logic XZero, YZero, // is zero
input logic XNaN, YNaN, // is NaN
input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
input logic [P.FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntRes // compare integer result
output logic [P.FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [P.XLEN-1:0] CmpIntRes // compare integer result
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; // NaN result
logic [P.FLEN-1:0] NaNRes; // NaN result
logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
@ -85,44 +83,44 @@ module fcmp (
// for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if (P.FPSIZES == 1)
if(P.IEEE754) assign NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
else assign NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (P.FPSIZES == 2)
if(P.IEEE754) assign NaNRes = Fmt ? {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
else assign NaNRes = Fmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
else if (`FPSIZES == 3)
else if (P.FPSIZES == 3)
always_comb
case (Fmt)
`FMT:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}};
P.FMT:
if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
P.FMT1:
if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
else NaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
P.FMT2:
if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN2{1'b1}}, Xs, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
else NaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
default: NaNRes = {P.FLEN{1'bx}};
endcase
else if (`FPSIZES == 4)
else if (P.FPSIZES == 4)
always_comb
case (Fmt)
2'h3:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
if(P.IEEE754) NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, Xs, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
else NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
if(P.IEEE754) NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, Xs, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
else NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
if(P.IEEE754) NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, Xs, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
else NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
endcase
@ -155,6 +153,6 @@ module fcmp (
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
assign CmpIntRes = {(P.XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
endmodule

View File

@ -25,9 +25,8 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fctrl (
module fctrl import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
// input control signals
@ -49,7 +48,7 @@ module fctrl (
// opperation mux selections
output logic FCvtIntE, FCvtIntW, // convert to integer opperation
output logic [2:0] FrmM, // FP rounding mode
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic FpLoadStoreM, // FP load or store instruction
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
@ -74,7 +73,7 @@ module fctrl (
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [`FMTBITS-1:0] FmtD; // FP format
logic [P.FMTBITS-1:0] FmtD; // FP format
logic [1:0] Fmt, Fmt2; // format - before possible reduction
logic SupportedFmt; // is the format supported
logic SupportedFmt2; // is the source format supported for fp -> fp
@ -84,10 +83,10 @@ module fctrl (
assign Fmt = Funct7D[1:0];
assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
(Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
(Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) |
(Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED));
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) |
(Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));
// decode the instruction
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
@ -102,15 +101,15 @@ module fctrl (
case(OpD)
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
@ -227,14 +226,14 @@ module fctrl (
// 10 - half
// 11 - quad
if (`FPSIZES == 1)
if (P.FPSIZES == 1)
assign FmtD = 0;
else if (`FPSIZES == 2)begin
else if (P.FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
assign FmtD = (P.FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
else if (P.FPSIZES == 3|P.FPSIZES == 4)
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
@ -313,7 +312,7 @@ module fctrl (
assign Adr3D = InstrD[31:27];
// D/E pipleine register
flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
@ -321,11 +320,11 @@ module fctrl (
flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
// Integer division on FPU divider
if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
if (P.M_SUPPORTED & P.IDIV_ON_FPU) assign IDivStartE = IntDivE;
else assign IDivStartE = 0;
// E/M pipleine register
flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM});

View File

@ -27,23 +27,21 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fcvt (
module fcvt import cvw::*; #(parameter cvw_t P) (
input logic Xs, // input's sign
input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [P.NE-1:0] Xe, // input's exponent
input logic [P.NF:0] Xm, // input's fraction
input logic [P.XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`NE:0] Ce, // the calculated expoent
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [P.NE:0] Ce, // the calculated expoent
output logic [P.LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
output logic ResSubnormUf,// does the result underflow or is subnormal
output logic Cs, // the result's sign
output logic IntZero, // is the integer zero?
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
output logic [P.CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
@ -56,16 +54,16 @@ module fcvt (
// bit 2 bit 1 bit 0
// for example: signed long -> single floating point has the OpCode 101
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`NE-2:0] NewBias; // the bias of the final result
logic [`NE-1:0] OldExp; // the old exponent
logic [P.FMTBITS-1:0] OutFmt; // format of the output
logic [P.XLEN-1:0] PosInt; // the positive integer input
logic [P.XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [P.NE-2:0] NewBias; // the bias of the final result
logic [P.NE-1:0] OldExp; // the old exponent
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
logic [P.CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [P.LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability
@ -76,9 +74,9 @@ module fcvt (
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
if (P.FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT);
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
@ -89,7 +87,7 @@ module fcvt (
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = Cs ? -Int : Int;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign TrimInt = {{P.XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZero = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
@ -99,13 +97,13 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
assign LzcInFull = IntToFp ? {TrimInt, {P.CVTLEN-P.XLEN+1{1'b0}}} :
{Xm, {P.CVTLEN-P.NF{1'b0}}};
// used as shifter input in postprocessor
assign LzcIn = LzcInFull[`CVTLEN-1:0];
assign LzcIn = LzcInFull[P.CVTLEN-1:0];
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
lzc #(P.CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// exp calculations
@ -114,42 +112,42 @@ module fcvt (
// Select the bias of the output
// fp -> int : select 1
// ??? -> fp : pick the new bias depending on the output format
if (`FPSIZES == 1) begin
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
if (P.FPSIZES == 1) begin
assign NewBias = ToInt ? (P.NE-1)'(1) : (P.NE-1)'(P.BIAS);
end else if (`FPSIZES == 2) begin
logic [`NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (P.FPSIZES == 2) begin
logic [P.NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp;
end else if (P.FPSIZES == 3) begin
logic [P.NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
default: NewBiasToFp = {`NE-1{1'bx}};
P.FMT: NewBiasToFp = (P.NE-1)'(P.BIAS);
P.FMT1: NewBiasToFp = (P.NE-1)'(P.BIAS1);
P.FMT2: NewBiasToFp = (P.NE-1)'(P.BIAS2);
default: NewBiasToFp = {P.NE-1{1'bx}};
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 4) begin
logic [`NE-2:0] NewBiasToFp;
end else if (P.FPSIZES == 4) begin
logic [P.NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
2'h3: NewBiasToFp = (P.NE-1)'(P.Q_BIAS);
2'h1: NewBiasToFp = (P.NE-1)'(P.D_BIAS);
2'h0: NewBiasToFp = (P.NE-1)'(P.S_BIAS);
2'h2: NewBiasToFp = (P.NE-1)'(P.H_BIAS);
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
end
// select the old exponent
// int -> fp : largest bias + XLEN-1
// fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
assign OldExp = IntToFp ? (P.NE)'(P.BIAS)+(P.NE)'(P.XLEN-1) : Xe;
// calculate CalcExp
// fp -> fp :
@ -159,13 +157,13 @@ module fcvt (
// - correct the expoent when there is a normalization shift ( + LeadingZeros+1)
// - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction
// fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1)
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// | P.XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start
// | `XLEN zeros | Mantissa | 0's if nessisary |
// | P.XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// | P.XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
@ -185,13 +183,13 @@ module fcvt (
// - newBias to make the biased exponent
//
// oldexp - biasold - LeadingZeros + newbias
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
assign Ce = {1'b0, OldExp} - (P.NE+1)'(P.BIAS) - {{P.NE-P.LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp;
assign ResSubnormUf = (~|Ce | Ce[P.NE])&~XZero&~IntToFp;
///////////////////////////////////////////////////////////////////////////
@ -211,8 +209,8 @@ module fcvt (
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
always_comb
if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
if(ToInt) ShiftAmt = Ce[P.LOGCVTLEN-1:0]&{P.LOGCVTLEN{~Ce[P.NE]}};
else if (ResSubnormUf) ShiftAmt = (P.LOGCVTLEN)'(P.NF-1)+Ce[P.LOGCVTLEN-1:0];
else ShiftAmt = LeadingZeros;
@ -227,7 +225,7 @@ module fcvt (
// - otherwise: the floating point input's sign
always_comb
if(IntToFp)
if(Int64) Cs = Int[`XLEN-1]&Signed;
if(Int64) Cs = Int[P.XLEN-1]&Signed;
else Cs = Int[31]&Signed;
else Cs = Xs;

View File

@ -26,15 +26,13 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrt(
module fdivsqrt import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
input logic [`FMTBITS-1:0] FmtE,
input logic [P.FMTBITS-1:0] FmtE,
input logic XsE,
input logic [`NF:0] XmE, YmE,
input logic [`NE-1:0] XeE, YeE,
input logic [P.NF:0] XmE, YmE,
input logic [P.NE-1:0] XeE, YeE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
@ -42,39 +40,39 @@ module fdivsqrt(
input logic StallM,
input logic FlushE,
input logic SqrtE, SqrtM,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M,
input logic IntDivE, W64E,
output logic DivStickyM,
output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [`NE+1:0] QeM,
output logic [`DIVb:0] QmM,
output logic [`XLEN-1:0] FIntDivResultM
output logic [P.NE+1:0] QeM,
output logic [P.DIVb:0] QmM,
output logic [P.XLEN-1:0] FIntDivResultM
);
// Floating-point division and square root module, with optional integer division and remainder
// Computes X/Y, sqrt(X), A/B, or A%B
logic [`DIVb+3:0] WS, WC; // Partial remainder components
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [`DIVb+3:0] D; // Iterator Divisor
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [`DIVb+1:0] FirstC; // Step tracker
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [P.DIVb+3:0] D; // Iterator Divisor
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [P.DIVb+1:0] FirstC; // Step tracker
logic Firstun; // Quotient selection
logic WZeroE; // Early termination flag
logic [`DURLEN-1:0] CyclesE; // FSM cycles
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation
logic [`DIVBLEN:0] nM, mM; // Shift amounts
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
// Int-specific
@ -82,18 +80,18 @@ module fdivsqrt(
.BZeroM, .nM, .mM, .AM,
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
fdivsqrtfsm fdivsqrtfsm( // FSM
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
// Int-specific
.IDivStartE, .ISpecialCaseE, .IntDivE);
fdivsqrtiter fdivsqrtiter( // CSA Iterator
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
.QmM, .WZeroE, .DivStickyM,

View File

@ -26,51 +26,49 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtcycles(
input logic [`FMTBITS-1:0] FmtE,
module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic IntDivE,
input logic [`DIVBLEN:0] nE,
output logic [`DURLEN-1:0] CyclesE
input logic [P.DIVBLEN:0] nE,
output logic [P.DURLEN-1:0] CyclesE
);
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = `NF+3
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = P.NF+3
// NS = NF + 1
// N = NS or NS+2 for div/sqrt.
/* verilator lint_off WIDTH */
if (`FPSIZES == 1)
assign Nf = `NF;
else if (`FPSIZES == 2)
if (P.FPSIZES == 1)
assign Nf = P.NF;
else if (P.FPSIZES == 2)
always_comb
case (FmtE)
1'b0: Nf = `NF1;
1'b1: Nf = `NF;
1'b0: Nf = P.NF1;
1'b1: Nf = P.NF;
endcase
else if (`FPSIZES == 3)
else if (P.FPSIZES == 3)
always_comb
case (FmtE)
`FMT: Nf = `NF;
`FMT1: Nf = `NF1;
`FMT2: Nf = `NF2;
P.FMT: Nf = P.NF;
P.FMT1: Nf = P.NF1;
P.FMT2: Nf = P.NF2;
endcase
else if (`FPSIZES == 4)
else if (P.FPSIZES == 4)
always_comb
case(FmtE)
`S_FMT: Nf = `S_NF;
`D_FMT: Nf = `D_NF;
`H_FMT: Nf = `H_NF;
`Q_FMT: Nf = `Q_NF;
P.S_FMT: Nf = P.S_NF;
P.D_FMT: Nf = P.D_NF;
P.H_FMT: Nf = P.H_NF;
P.Q_FMT: Nf = P.Q_NF;
endcase
always_comb begin
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
end
/* verilator lint_on WIDTH */
endmodule
endmodule

View File

@ -26,49 +26,47 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtexpcalc(
input logic [`FMTBITS-1:0] Fmt,
input logic [`NE-1:0] Xe, Ye,
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
input logic [P.NE-1:0] Xe, Ye,
input logic Sqrt,
input logic XZero,
input logic [`DIVBLEN:0] ell, m,
output logic [`NE+1:0] Qe
input logic [P.DIVBLEN:0] ell, m,
output logic [P.NE+1:0] Qe
);
logic [`NE-2:0] Bias;
logic [`NE+1:0] SXExp;
logic [`NE+1:0] SExp;
logic [`NE+1:0] DExp;
logic [P.NE-2:0] Bias;
logic [P.NE+1:0] SXExp;
logic [P.NE+1:0] SExp;
logic [P.NE+1:0] DExp;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
P.FMT: Bias = (P.NE-1)'(P.BIAS);
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// correct exponent for subnormal input's normalization shifts
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias});
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
assign Qe = Sqrt ? SExp : DExp;
endmodule

View File

@ -26,14 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtfgen2 (
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
input logic up, uz,
input logic [`DIVb+3:0] C, U, UM,
output logic [`DIVb+3:0] F
input logic [P.DIVb+3:0] C, U, UM,
output logic [P.DIVb+3:0] F
);
logic [`DIVb+3:0] FP, FN, FZ;
logic [P.DIVb+3:0] FP, FN, FZ;
// Generate for both positive and negative bits
assign FP = ~(U << 1) & C;

View File

@ -26,14 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtfgen4 (
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
input logic [3:0] udigit,
input logic [`DIVb+3:0] C, U, UM,
output logic [`DIVb+3:0] F
input logic [P.DIVb+3:0] C, U, UM,
output logic [P.DIVb+3:0] F
);
logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
// Generate for both positive and negative bits
assign F2 = (~U << 2) & (C << 2);
@ -49,4 +47,4 @@ module fdivsqrtfgen4 (
else if (udigit[1]) F = FN1;
else if (udigit[0]) F = FN2;
else F = F0;
endmodule
endmodule

View File

@ -26,9 +26,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtfsm(
module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
@ -39,7 +37,7 @@ module fdivsqrtfsm(
input logic StallM, FlushE,
input logic IntDivE,
input logic ISpecialCaseE,
input logic [`DURLEN-1:0] CyclesE,
input logic [P.DURLEN-1:0] CyclesE,
output logic IFDivStartE,
output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM
@ -49,16 +47,16 @@ module fdivsqrtfsm(
statetype state;
logic SpecialCaseE, FSpecialCaseE;
logic [`DURLEN-1:0] step;
logic [P.DURLEN-1:0] step;
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
assign FDivDoneE = (state == DONE);
assign FDivBusyE = (state == BUSY) | IFDivStartE;
// terminate immediately on special cases
assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
else assign SpecialCaseE = FSpecialCaseE;
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
@ -78,4 +76,4 @@ module fdivsqrtfsm(
end
end
endmodule
endmodule

View File

@ -26,38 +26,36 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtiter(
module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic IFDivStartE,
input logic FDivBusyE,
input logic SqrtE,
input logic [`DIVb+3:0] X, D,
output logic [`DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC,
input logic [P.DIVb+3:0] X, D,
output logic [P.DIVb:0] FirstU, FirstUM,
output logic [P.DIVb+1:0] FirstC,
output logic Firstun,
output logic [`DIVb+3:0] FirstWS, FirstWC
output logic [P.DIVb+3:0] FirstWS, FirstWC
);
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b
logic [`DIVCOPIES-1:0] un;
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
logic [P.DIVb+1:0] initC; // Q2.b
logic [P.DIVCOPIES-1:0] un;
logic [`DIVb+3:0] WSN, WCN; // Q4.b
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [`DIVb+1:0] NextC;
logic [`DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM;
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [P.DIVb+1:0] NextC;
logic [P.DIVb:0] UMux, UMMux;
logic [P.DIVb:0] initU, initUM;
/* verilator lint_on UNOPTFLAT */
// Top Muxes and Registers
@ -66,36 +64,36 @@ module fdivsqrtiter(
// are fed back for the next iteration.
// Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
mux2 #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN);
mux2 #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN);
flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
// UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
assign initU = {SqrtE, {(`DIVb){1'b0}}};
assign initUM = {~SqrtE, {(`DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
assign initU = {SqrtE, {(P.DIVb){1'b0}}};
assign initUM = {~SqrtE, {(P.DIVb){1'b0}}};
mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux);
flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
// C register/initialization mux
// Initialize C to -1 for sqrt and -R for division
logic [1:0] initCUpper;
if(`RADIX == 4) begin
if(P.RADIX == 4) begin
mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
end else begin
mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
end
assign initC = {initCUpper, {`DIVb{1'b0}}};
mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC);
flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
assign initC = {initCUpper, {P.DIVb{1'b0}}};
mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC);
flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
// Divisor Selections
assign DBar = ~D; // for -D
if(`RADIX == 4) begin : d2
if(P.RADIX == 4) begin : d2
assign D2 = D << 1; // for 2D, only used in R4
assign DBar2 = ~D2; // for -2D, only used in R4
end
@ -103,15 +101,15 @@ module fdivsqrtiter(
// k=DIVCOPIES of the recurrence logic
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
if (`RADIX == 2) begin: stage
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE,
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
if (P.RADIX == 2) begin: stage
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end else begin: stage
logic j1;
assign j1 = (i == 0 & ~C[0][`DIVb-1]);
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end

View File

@ -26,51 +26,49 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtpostproc(
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+3:0] D,
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb+1:0] FirstC,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+3:0] D,
input logic [P.DIVb:0] FirstU, FirstUM,
input logic [P.DIVb+1:0] FirstC,
input logic SqrtE,
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
input logic [`XLEN-1:0] AM,
input logic [P.XLEN-1:0] AM,
input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
input logic [`DIVBLEN:0] nM, mM,
output logic [`DIVb:0] QmM,
input logic [P.DIVBLEN:0] nM, mM,
output logic [P.DIVb:0] QmM,
output logic WZeroE,
output logic DivStickyM,
output logic [`XLEN-1:0] FIntDivResultM
output logic [P.XLEN-1:0] FIntDivResultM
);
logic [`DIVb+3:0] W, Sum;
logic [`DIVb:0] PreQmM;
logic [P.DIVb+3:0] W, Sum;
logic [P.DIVb:0] PreQmM;
logic NegStickyM;
logic weq0E, WZeroM;
logic [`XLEN-1:0] IntDivResultM;
logic [P.XLEN-1:0] IntDivResultM;
//////////////////////////
// Execute Stage: Detect early termination for an exact result
//////////////////////////
// check for early termination on an exact result.
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E);
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
if (`RADIX == 2) begin: R2EarlyTerm
logic [`DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
logic [`DIVb+2:0] FirstK;
if (P.RADIX == 2) begin: R2EarlyTerm
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
logic [P.DIVb+2:0] FirstK;
logic wfeq0E;
logic [`DIVb+3:0] WCF, WSF;
logic [P.DIVb+3:0] WCF, WSF;
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
assign FZeroDivE = D << 1; // F for divide
mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
assign WZeroE = weq0E|(wfeq0E & Firstun);
end else begin
assign WZeroE = weq0E;
@ -91,27 +89,27 @@ module fdivsqrtpostproc(
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
assign Sum = WC + WS;
assign NegStickyM = Sum[`DIVb+3];
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
assign NegStickyM = Sum[P.DIVb+3];
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
// Integer quotient or remainder correctoin, normalization, and special cases
if (`IDIV_ON_FPU) begin:intpostproc // Int supported
logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
logic [P.DIVBLEN:0] NormShiftM;
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
assign W = $signed(Sum) >>> `LOGR;
assign W = $signed(Sum) >>> P.LOGR;
assign UnsignedQuotM = {3'b000, PreQmM};
// Integer remainder: sticky and sign correction muxes
mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
// Select quotient or remainder and do normalization shift
mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM);
mux2 #(`DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
// special case logic
@ -119,18 +117,18 @@ module fdivsqrtpostproc(
always_comb
if (BZeroM) begin // Divide by zero
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = {(`XLEN){1'b1}};
else IntDivResultM = {(P.XLEN){1'b1}};
end else if (ALTBM) begin // Numerator is zero
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = '0;
end else IntDivResultM = PreIntResultM[`XLEN-1:0];
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
// sign extend result for W64
if (`XLEN==64) begin
mux2 #(64) resmux(IntDivResultM[`XLEN-1:0],
{{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
if (P.XLEN==64) begin
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
W64M, FIntDivResultM);
end else
assign FIntDivResultM = IntDivResultM[`XLEN-1:0];
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
end
endmodule

View File

@ -26,56 +26,54 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtpreproc (
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic IFDivStartE,
input logic [`NF:0] Xm, Ym,
input logic [`NE-1:0] Xe, Ye,
input logic [`FMTBITS-1:0] FmtE,
input logic [P.NF:0] Xm, Ym,
input logic [P.NE-1:0] Xe, Ye,
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [`NE+1:0] QeM,
output logic [`DIVb+3:0] X, D,
output logic [P.NE+1:0] QeM,
output logic [P.DIVb+3:0] X, D,
// Int-specific
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E,
output logic ISpecialCaseE,
output logic [`DURLEN-1:0] CyclesE,
output logic [`DIVBLEN:0] nM, mM,
output logic [P.DURLEN-1:0] CyclesE,
output logic [P.DIVBLEN:0] nM, mM,
output logic NegQuotM, ALTBM, IntDivM, W64M,
output logic AsM, BZeroM,
output logic [`XLEN-1:0] AM
output logic [P.XLEN-1:0] AM
);
logic [`DIVb-1:0] Xfract, Dfract;
logic [`DIVb:0] PreSqrtX;
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic [P.DIVb-1:0] Xfract, Dfract;
logic [P.DIVb:0] PreSqrtX;
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division
logic NegQuotE; // Integer quotient is negative
logic AsE, BsE; // Signs of integer inputs
logic [`XLEN-1:0] AE; // input A after W64 adjustment
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE;
//////////////////////////////////////////////////////
// Integer Preprocessing
//////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [`XLEN-1:0] BE, PosA, PosB;
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [P.XLEN-1:0] BE, PosA, PosB;
// Extract inputs, signs, zero, depending on W64 mode if applicable
assign SignedDivE = ~Funct3E[0];
// Source handling
if (`XLEN==64) begin // 64-bit, supports W64
if (P.XLEN==64) begin // 64-bit, supports W64
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
end else begin // 32 bits only
@ -84,21 +82,21 @@ module fdivsqrtpreproc (
end
assign AZeroE = ~(|AE);
assign BZeroE = ~(|BE);
assign AsE = AE[`XLEN-1] & SignedDivE;
assign BsE = BE[`XLEN-1] & SignedDivE;
assign AsE = AE[P.XLEN-1] & SignedDivE;
assign BsE = BE[P.XLEN-1] & SignedDivE;
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
// Force integer inputs to be postiive
mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA);
mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB);
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs
mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported
assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
assign NumerZeroE = XZeroE;
end
@ -107,8 +105,8 @@ module fdivsqrtpreproc (
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(`DIVb) lzcX (IFX, ell);
lzc #(`DIVb) lzcY (IFD, mE);
lzc #(P.DIVb) lzcX (IFX, ell);
lzc #(P.DIVb) lzcY (IFD, mE);
// Normalization shift: shift off leading one
assign Xfract = (IFX << ell) << 1;
@ -122,28 +120,28 @@ module fdivsqrtpreproc (
// and nE (number of fractional digits)
//////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [`DIVBLEN:0] ZeroDiff, p;
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [P.DIVBLEN:0] ZeroDiff, p;
// calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
// Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE;
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (`LOGRK > 0) begin // more than 1 bit per cycle
logic [`LOGRK-1:0] IntTrunc, RightShiftX;
logic [`DIVBLEN:0] TotalIntBits, IntSteps;
if (P.LOGRK > 0) begin // more than 1 bit per cycle
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */
assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting
@ -167,42 +165,42 @@ module fdivsqrtpreproc (
assign DivX = {3'b000, ~NumerZeroE, Xfract};
// Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
//////////////////////////////////////////////////////
// Selet integer or floating-point operands
//////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin
mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
if (P.IDIV_ON_FPU) begin
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
end else begin
assign X = PreShiftX;
end
// Divisior register
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
if (`IDIV_ON_FPU) begin:intpipelineregs
if (P.IDIV_ON_FPU) begin:intpipelineregs
// pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (`XLEN==64)
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel2 (
input logic [3:0] ps, pc,
output logic up, uz, un

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel4 (
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel4cmp (
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,

View File

@ -26,27 +26,26 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 (
input logic [`DIVb+3:0] D, DBar,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C,
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] D, DBar,
input logic [P.DIVb:0] U, UM,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+1:0] C,
input logic SqrtE,
output logic un,
output logic [`DIVb+1:0] CNext,
output logic [`DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSNext, WCNext
output logic [P.DIVb+1:0] CNext,
output logic [P.DIVb:0] UNext, UMNext,
output logic [P.DIVb+3:0] WSNext, WCNext
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] Dsel;
logic [P.DIVb+3:0] Dsel;
logic up, uz;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
logic [`DIVb+3:0] WSA, WCA;
logic [P.DIVb+3:0] F;
logic [P.DIVb+3:0] AddIn;
logic [P.DIVb+3:0] WSA, WCA;
// Qmient Selection logic
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
@ -56,10 +55,10 @@ module fdivsqrtstage2 (
// 0000 = 0
// 0010 = -1
// 0001 = -2
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
// Sqrt F generation. Extend C, U, UM to Q4.k
fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
// Divisor multiple
always_comb
@ -69,16 +68,16 @@ module fdivsqrtstage2 (
// Partial Product Generation
// WSA, WCA = WS + WC - qD
mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
assign WSNext = WSA << 1;
assign WCNext = WCA << 1;
// Shift thermometer code C
assign CNext = {1'b1, C[`DIVb+1:1]};
assign CNext = {1'b1, C[P.DIVb+1:1]};
// Unified On-The-Fly Converter to accumulate result
fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
endmodule

View File

@ -26,29 +26,27 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtstage4 (
input logic [`DIVb+3:0] D, DBar, D2, DBar2,
input logic [`DIVb:0] U,UM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C,
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
input logic [P.DIVb:0] U,UM,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+1:0] C,
input logic SqrtE, j1,
output logic [`DIVb+1:0] CNext,
output logic [P.DIVb+1:0] CNext,
output logic un,
output logic [`DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSNext, WCNext
output logic [P.DIVb:0] UNext, UMNext,
output logic [P.DIVb+3:0] WSNext, WCNext
);
logic [`DIVb+3:0] Dsel;
logic [P.DIVb+3:0] Dsel;
logic [3:0] udigit;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
logic [P.DIVb+3:0] F;
logic [P.DIVb+3:0] AddIn;
logic [4:0] Smsbs;
logic [2:0] Dmsbs;
logic [7:0] WCmsbs, WSmsbs;
logic CarryIn;
logic [`DIVb+3:0] WSA, WCA;
logic [P.DIVb+3:0] WSA, WCA;
// Digit Selection logic
// u encoding:
@ -57,16 +55,16 @@ module fdivsqrtstage4 (
// 0000 = 0
// 0010 = -1
// 0001 = -2
assign Smsbs = U[`DIVb:`DIVb-4];
assign Dmsbs = D[`DIVb-1:`DIVb-3];
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
assign Smsbs = U[P.DIVb:P.DIVb-4];
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
assign un = 1'b0; // unused for radix 4
// F generation logic
fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
// Divisor multiple logic
always_comb
@ -83,15 +81,15 @@ module fdivsqrtstage4 (
// {WS, WC}}Next = (WS + WC - qD or F) << 2
assign AddIn = SqrtE ? F : Dsel;
assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
assign WSNext = WSA << 2;
assign WCNext = WCA << 2;
// Shift thermometer code C
assign CNext = {2'b11, C[`DIVb+1:2]};
assign CNext = {2'b11, C[P.DIVb+1:2]};
// On-the-fly converter to accumulate result
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext);
endmodule

View File

@ -26,22 +26,20 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
///////////////////////////////
// Unified OTFC, Radix 2 //
///////////////////////////////
module fdivsqrtuotfc2(
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
input logic up, un,
input logic [`DIVb+1:0] C,
input logic [`DIVb:0] U, UM,
output logic [`DIVb:0] UNext, UMNext
input logic [P.DIVb+1:0] C,
input logic [P.DIVb:0] U, UM,
output logic [P.DIVb:0] UNext, UMNext
);
// The on-the-fly converter transfers the divsqrt
// bits to the quotient as they come.
logic [`DIVb:0] K;
logic [P.DIVb:0] K;
assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
always_comb begin
if (up) begin

View File

@ -26,19 +26,17 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtuotfc4(
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
input logic [3:0] udigit,
input logic [`DIVb:0] U, UM,
input logic [`DIVb:0] C,
output logic [`DIVb:0] UNext, UMNext
input logic [P.DIVb:0] U, UM,
input logic [P.DIVb:0] C,
output logic [P.DIVb:0] UNext, UMNext
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVb:0] K1, K2, K3;
logic [P.DIVb:0] K1, K2, K3;
assign K1 = (C&~(C << 1)); // K
assign K2 = ((C << 1)&~(C << 2)); // 2K
assign K3 = (C & ~(C << 2)); // 3K

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses

View File

@ -26,22 +26,20 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fma(
module fma import cvw::*; #(parameter cvw_t P) (
input logic Xs, Ys, Zs, // input's signs
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic [P.NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [P.NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] OpCtrl, // operation control
output logic ASticky, // sticky bit that is calculated during alignment
output logic [3*`NF+3:0] Sm, // the positive sum's significand
output logic [3*P.NF+3:0] Sm, // the positive sum's significand
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic Ss, // the sum's sign
output logic [`NE+1:0] Se, // the sum's exponent
output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count
output logic [P.NE+1:0] Se, // the sum's exponent
output logic [$clog2(3*P.NF+5)-1:0] SCnt // normalization shift count
);
// OpCtrl:
@ -54,12 +52,12 @@ module fma(
// 110 - add
// 111 - sub
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*P.NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
logic [3*P.NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
logic KillProd; // set the product to zero before addition if the product is too small to matter
logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
@ -71,10 +69,10 @@ module fma(
// calculate the product's exponent
fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
fmaexpadd #(P) expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
// multiplication of the mantissa's
fmamult mult(.Xm, .Ym, .Pm);
fmamult #(P) mult(.Xm, .Ym, .Pm);
// calculate the signs and take the opperation into account
fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
@ -82,15 +80,15 @@ module fma(
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
fmaalign #(P) align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
endmodule

View File

@ -26,25 +26,23 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaadd(
input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [`NE-1:0] Ze, // exponent of Z
module fmaadd import cvw::*; #(parameter cvw_t P) (
input logic [3*P.NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [P.NE-1:0] Ze, // exponent of Z
input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic [`NE+1:0] Pe, // product's exponet
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic [P.NE+1:0] Pe, // product's exponet
input logic [2*P.NF+1:0] Pm, // the product's mantissa
input logic InvA, // invert the aligned addend
input logic KillProd, // should the product be set to 0
input logic ASticky, // Alighed addend's sticky bit
output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic [3*P.NF+3:0] AmInv, // aligned addend possibly inverted
output logic [2*P.NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic Ss, // sum's sign
output logic [`NE+1:0] Se, // sum's exponent
output logic [3*`NF+3:0] Sm // the positive sum
output logic [P.NE+1:0] Se, // sum's exponent
output logic [3*P.NF+3:0] Sm // the positive sum
);
logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum
logic [3*P.NF+3:0] PreSum, NegPreSum; // possibly negitive sum
logic NegSum; // was the sum negitive
///////////////////////////////////////////////////////////////////////////////
@ -52,9 +50,9 @@ module fmaadd(
///////////////////////////////////////////////////////////////////////////////
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
assign AmInv = {3*`NF+4{InvA}}^Am;
assign AmInv = {3*P.NF+4{InvA}}^Am;
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = {2*`NF+2{~KillProd}}&Pm;
assign PmKilled = {2*P.NF+2{~KillProd}}&Pm;
// Do the addition
// - calculate a positive and negitive sum in parallel
// if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
@ -63,8 +61,8 @@ module fmaadd(
// addend - prod where product is killed (and not exactly zero) then don't add +1 from negation
// ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
// in this case this result is only ever selected when InvA=1 so we can remove &InvA
assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0};
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum : PreSum;

View File

@ -27,20 +27,18 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaalign(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
module fmaalign import cvw::*; #(parameter cvw_t P) (
input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [P.NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero,// is the input zero
output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic [3*P.NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ASticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*P.NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*P.NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ; // should the addend be killed
///////////////////////////////////////////////////////////////////////////////
@ -51,16 +49,16 @@ module fmaalign(
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze};
// Defualt Addition with only inital left shift
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)};
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));
assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3));
always_comb begin
// If the product is too small to effect the sum, kill the product
@ -68,7 +66,7 @@ module fmaalign(
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)};
ASticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
@ -86,12 +84,12 @@ module fmaalign(
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ASticky = |(ZmShifted[`NF-1:0]);
ASticky = |(ZmShifted[P.NF-1:0]);
end
end
assign Am = ZmShifted[4*`NF+3:`NF];
assign Am = ZmShifted[4*P.NF+3:P.NF];
endmodule

View File

@ -26,18 +26,16 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaexpadd(
input logic [`NE-1:0] Xe, Ye, // input's exponents
module fmaexpadd import cvw::*; #(parameter cvw_t P) (
input logic [P.NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
output logic [P.NE+1:0] Pe // product's exponent B^(1023)NE+2
);
logic PZero; // is the product zero?
// kill the exponent if the product is zero - either X or Y is 0
assign PZero = XZero | YZero;
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)});
endmodule

View File

@ -27,11 +27,9 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmalza #(WIDTH) (
module fmalza #(WIDTH, NF) (
input logic [WIDTH-1:0] A, // addend
input logic [2*`NF+1:0] Pm, // product
input logic [2*NF+1:0] Pm, // product
input logic Cin, // carry in
input logic sub, // subtraction
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
@ -42,7 +40,7 @@ module fmalza #(WIDTH) (
logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column
logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1
assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
assign B = {{(NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
assign P = A^B;
assign G = A&B;

View File

@ -26,11 +26,9 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmamult(
input logic [`NF:0] Xm, Ym, // x and y significand
output logic [2*`NF+1:0] Pm // product's significand
module fmamult import cvw::*; #(parameter cvw_t P) (
input logic [P.NF:0] Xm, Ym, // x and y significand
output logic [2*P.NF+1:0] Pm // product's significand
);
assign Pm = Xm * Ym;

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmasign(
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs

View File

@ -26,9 +26,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fpu (
module fpu import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
// Hazards
@ -44,7 +42,7 @@ module fpu (
// Execute stage
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
input logic IntDivE, W64E, // Integer division on FPU
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
input logic [4:0] RdE, // which FP register to write to (from IEU)
output logic FWriteIntE, // integer register write enable (to IEU)
output logic FCvtIntE, // Convert to int (to IEU)
@ -53,16 +51,16 @@ module fpu (
input logic [4:0] RdM, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic [P.FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
output logic [P.XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU)
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
// Writeback stage
input logic [4:0] RdW, // which FP register to write to (from IEU)
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
input logic [P.FLEN-1:0] ReadDataW, // Read data (from LSU)
output logic [P.XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
output logic [P.XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
);
// RISC-V FPU specifics:
@ -72,7 +70,7 @@ module fpu (
// control signals
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartE, IDivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
@ -86,20 +84,20 @@ module fpu (
logic FRegWriteE; // Write floating-point register
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [P.FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [P.FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [P.XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [P.FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [P.FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XsE, YsE, ZsE; // input's sign - execute stage
logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
logic [P.NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [P.NE-1:0] ZeM; // input's exponent - memory stage
logic [P.NF:0] XmE, YmE, ZmE; // input's significand - execute stage
logic [P.NF:0] XmM, YmM, ZmM; // input's significand - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
@ -110,56 +108,56 @@ module fpu (
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
// Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*`NF+3:0] SmE, SmM; // Sum significand
logic [3*P.NF+3:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [`NE+1:0] SeE,SeM; // Sum exponent
logic [P.NE+1:0] SeE,SeM; // Sum exponent
logic InvAE, InvAM; // Invert addend
logic AsE, AsM; // Addend sign
logic PsE, PsM; // Product sign
logic SsE, SsM; // Sum sign
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
// Cvt Signals
logic [`NE:0] CeE, CeM; // convert intermediate expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic [P.NE:0] CeE, CeM; // convert intermediate expoent
logic [P.LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
logic CsE, CsM; // convert result sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
logic [P.CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
// divide signals
logic [`DIVb:0] QmM; // fdivsqrt signifcand
logic [`NE+1:0] QeM; // fdivsqrt exponent
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
logic [P.NE+1:0] QeM; // fdivsqrt exponent
logic DivStickyM; // fdivsqrt sticky bit
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
// result and flag signals
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
logic [P.XLEN-1:0] ClassResE; // classify result
logic [P.FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
logic [P.XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
logic [P.FLEN-1:0] SgnResE; // sign injection result
logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [P.FLEN-1:0] PostProcResM; // Postprocessor output
logic [4:0] PostProcFlgM; // Postprocessor flags
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register
// other signals
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
logic mvsgn; // sign bit for extending move
//////////////////////////////////////////////////////////////////////////////////////////
@ -167,7 +165,7 @@ module fpu (
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.Funct3E, .IntDivE, .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
@ -177,15 +175,15 @@ module fpu (
.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
fregfile #(P.FLEN) fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// D/E pipeline registers
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(P.FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(P.FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(P.FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//////////////////////////////////////////////////////////////////////////////////////////
// Execute Stage: hazards, forwarding, unpacking, execution units
@ -197,37 +195,37 @@ module fpu (
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
mux3 #(P.FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
mux3 #(P.FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(P.FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
else if(`FPSIZES == 2)
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
if(P.FPSIZES == 1) assign BoxedOneE = {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)};
else if(P.FPSIZES == 2)
mux2 #(P.FLEN) fonemux ({{P.FLEN-P.LEN1{1'b1}}, 2'b0, {P.NE1-1{1'b1}}, (P.NF1)'(0)}, {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
mux4 #(P.FLEN) fonemux ({{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)},
{{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)},
{{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)},
{2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
mux2 #(P.FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
// For add and subtract, Z comes from second source operand
if(`FPSIZES == 1) assign BoxedZeroE = 0;
else if(`FPSIZES == 2)
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
if(P.FPSIZES == 1) assign BoxedZeroE = 0;
else if(P.FPSIZES == 2)
mux2 #(P.FLEN) fmulzeromux ({{P.FLEN-P.LEN1{1'b1}}, {P.LEN1{1'b0}}}, (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}},
{{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}},
{{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}},
(P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
mux3 #(P.FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
// unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
@ -235,99 +233,99 @@ module fpu (
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
// divide and square root: fdiv, fsqrt, optionally integer division
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM);
// compare: fmin/fmax, flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection: fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));
fsgninj #(P) fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));
// classify: fclass
fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
fclassify #(P) fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert: fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
fcvt #(P) fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
// NaN Box SrcA to convert integer to requested FP size
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
else if(`FPSIZES == 2)
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE};
else if(P.FPSIZES == 2)
mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]},
{{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]},
{{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]},
{{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
// select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register with fmv - to IEU
if(`FPSIZES == 1) begin
assign mvsgn = XE[`FLEN-1];
if(P.FPSIZES == 1) begin
assign mvsgn = XE[P.FLEN-1];
assign SgnExtXE = XE;
end else if(`FPSIZES == 2) begin
mux2 #(1) sgnmux (XE[`LEN1-1], XE[`FLEN-1],FmtE, mvsgn);
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{mvsgn}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
end else if(`FPSIZES == 3 | `FPSIZES == 4) begin
mux4 #(1) sgnmux (XE[`H_LEN-1], XE[`S_LEN-1], XE[`D_LEN-1], XE[`LLEN-1], FmtE, mvsgn);
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{mvsgn}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{mvsgn}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{mvsgn}}, XE[`D_LEN-1:0]},
end else if(P.FPSIZES == 2) begin
mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn);
mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE);
end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin
mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn);
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]},
{{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]},
{{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]},
XE, FmtE, SgnExtXE);
end
if (`FLEN>`XLEN)
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
if (P.FLEN>P.XLEN)
assign IntSrcXE = SgnExtXE[P.XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{mvsgn}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE};
mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// E/M pipe registers
// Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(P.NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(P.NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
flopenrc #(P.FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(P.XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(P.FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
flopenrc #(P.FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
//////////////////////////////////////////////////////////////////////////////////////////
// Memory Stage: postprocessor and result muxes
//////////////////////////////////////////////////////////////////////////////////////////
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
@ -337,18 +335,18 @@ module fpu (
// FPU flag selection - to privileged
mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
flopenrc #(P.FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(P.XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(P.XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
//////////////////////////////////////////////////////////////////////////////////////////
// Writeback Stage: result mux
//////////////////////////////////////////////////////////////////////////////////////////
// select the result to be written to the FP register
mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
mux2 #(P.FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
endmodule // fpu

View File

@ -26,17 +26,15 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fregfile (
module fregfile #(parameter FLEN) (
input logic clk, reset,
input logic we4, // write enable
input logic [4:0] a1, a2, a3, a4, // adresses
input logic [`FLEN-1:0] wd4, // write data
output logic [`FLEN-1:0] rd1, rd2, rd3 // read data
input logic [FLEN-1:0] wd4, // write data
output logic [FLEN-1:0] rd1, rd2, rd3 // read data
);
logic [`FLEN-1:0] rf[31:0];
logic [FLEN-1:0] rf[31:0];
integer i;
// three ported register file

View File

@ -26,14 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fsgninj (
module fsgninj import cvw::*; #(parameter cvw_t P) (
input logic Xs, Ys, // X and Y sign bits
input logic [`FLEN-1:0] X, // X
input logic [`FMTBITS-1:0] Fmt, // format
input logic [P.FLEN-1:0] X, // X
input logic [P.FMTBITS-1:0] Fmt, // format
input logic [1:0] OpCtrl, // operation control
output logic [`FLEN-1:0] SgnRes // result
output logic [P.FLEN-1:0] SgnRes // result
);
logic ResSgn; // result sign
@ -50,30 +48,30 @@ module fsgninj (
// - uses NaN-blocking format
// - if there are any unused bits the most significant bits are filled with 1s
if (`FPSIZES == 1)
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2)
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin
if (P.FPSIZES == 1)
assign SgnRes = {ResSgn, X[P.FLEN-2:0]};
else if (P.FPSIZES == 2)
assign SgnRes = {~Fmt|ResSgn, X[P.FLEN-2:P.LEN1], Fmt ? X[P.LEN1-1] : ResSgn, X[P.LEN1-2:0]};
else if (P.FPSIZES == 3) begin
logic [2:0] SgnBits;
always_comb
case (Fmt)
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn};
P.FMT: SgnBits = {ResSgn, X[P.LEN1-1], X[P.LEN2-1]};
P.FMT1: SgnBits = {1'b1, ResSgn, X[P.LEN2-1]};
P.FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}};
endcase
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin
assign SgnRes = {SgnBits[2], X[P.FLEN-2:P.LEN1], SgnBits[1], X[P.LEN1-2:P.LEN2], SgnBits[0], X[P.LEN2-2:0]};
end else if (P.FPSIZES == 4) begin
logic [3:0] SgnBits;
always_comb
case (Fmt)
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn};
P.Q_FMT: SgnBits = {ResSgn, X[P.D_LEN-1], X[P.S_LEN-1], X[P.H_LEN-1]};
P.D_FMT: SgnBits = {1'b1, ResSgn, X[P.S_LEN-1], X[P.H_LEN-1]};
P.S_FMT: SgnBits = {2'b11, ResSgn, X[P.H_LEN-1]};
P.H_FMT: SgnBits = {3'b111, ResSgn};
endcase
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
assign SgnRes = {SgnBits[3], X[P.Q_LEN-2:P.D_LEN], SgnBits[2], X[P.D_LEN-2:P.S_LEN], SgnBits[1], X[P.S_LEN-2:P.H_LEN], SgnBits[0], X[P.H_LEN-2:0]};
end
endmodule

View File

@ -26,22 +26,20 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module cvtshiftcalc(
module cvtshiftcalc import cvw::*; #(parameter cvw_t P) (
input logic XZero, // is the input zero?
input logic ToInt, // to integer conversion?
input logic IntToFp, // interger to floating point conversion?
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NF:0] Xm, // input mantissas
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic [P.NE:0] CvtCe, // the calculated expoent
input logic [P.NF:0] Xm, // input mantissas
input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic CvtResSubnormUf, // is the conversion result subnormal or underlows
output logic CvtResUf, // does the cvt result unerflow
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
output logic [P.CVTLEN+P.NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [$clog2(P.NF):0] ResNegNF; // the result's fraction length negated (-NF)
///////////////////////////////////////////////////////////////////////////
// shifter
@ -49,7 +47,7 @@ module cvtshiftcalc(
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | mantissa | 0's if nessisary |
// | P.XLEN zeros | mantissa | 0's if nessisary |
// .
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
@ -57,7 +55,7 @@ module cvtshiftcalc(
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | mantissa | 0's if nessisary |
// | P.NF-1 zeros | mantissa | 0's if nessisary |
// .
// - otherwise:
// | LzcInM | 0's if nessisary |
@ -67,33 +65,33 @@ module cvtshiftcalc(
// get rid of round bit if needed
// | add sticky bit if needed
// | |
if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
if (ToInt) CvtShiftIn = {{P.XLEN{1'b0}}, Xm[P.NF]&~CvtCe[P.NE], Xm[P.NF-1]|(CvtCe[P.NE]&Xm[P.NF]), Xm[P.NF-2:0], {P.CVTLEN-P.XLEN{1'b0}}};
else if (CvtResSubnormUf) CvtShiftIn = {{P.NF-1{1'b0}}, Xm, {P.CVTLEN-P.NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {P.NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
if (P.FPSIZES == 1) begin
assign ResNegNF = -($clog2(P.NF)+1)'(P.NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (P.FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(P.NF)+1)'(P.NF) : -($clog2(P.NF)+1)'(P.NF1);
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
P.FMT: ResNegNF = -($clog2(P.NF)+1)'(P.NF);
P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1);
P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
2'h3: ResNegNF = -($clog2(P.NF)+1)'(P.Q_NF);
2'h1: ResNegNF = -($clog2(P.NF)+1)'(P.D_NF);
2'h0: ResNegNF = -($clog2(P.NF)+1)'(P.S_NF);
2'h2: ResNegNF = -($clog2(P.NF)+1)'(P.H_NF);
endcase
end
@ -102,6 +100,6 @@ module cvtshiftcalc(
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
assign CvtResUf = ($signed(CvtCe) < $signed({{P.NE-$clog2(P.NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
endmodule
endmodule

View File

@ -26,24 +26,22 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////`include "wally-config.vh"
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVb:0] DivQm, // divsqrt significand
input logic [`NE+1:0] DivQe, // divsqrt exponent
output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivQe, // divsqrt exponent
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
output logic DivSubnormShiftPos // is the subnormal shift amount positive
);
logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount
logic [P.LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
logic [P.LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivQe
@ -51,8 +49,8 @@ module divshiftcalc(
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivQe+NF+1-1
assign DivSubnormShift = (`NE+2)'(`NF)+DivQe;
assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1];
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivQe
@ -62,13 +60,13 @@ module divshiftcalc(
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`LOGNORMSHIFTSZ)'(`NF);
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
// if the shift amount is negitive then don't shift (keep sticky bit)
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0;
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0;
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// pre-shift the divider result for normalization
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}};
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
endmodule

View File

@ -25,18 +25,17 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module flags(
module flags import cvw::*; #(parameter cvw_t P) (
input logic Xs, // X sign
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // is a Inf input being used
input logic XInf, YInf, ZInf, // inputs are infinity
input logic NaNIn, // is a NaN input being used
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
input logic XZero, YZero, // inputs are zero
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Me, // exponent of the normalized sum
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [P.NE+1:0] Me, // exponent of the normalized sum
// rounding
input logic Plus1, // do you add one for rounding
input logic Round, Guard, Sticky, // bits used to determine rounding
@ -47,7 +46,7 @@ module flags(
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic [P.NE:0] CvtCe, // the calculated expoent - Cvt
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
// divsqrt
input logic DivOp, // conversion opperation?
@ -92,33 +91,33 @@ module flags(
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
if (P.FPSIZES == 1) begin
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
end else if (P.FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
endcase
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
assign ShiftGtIntSz = (|FullRe[P.Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end
@ -127,7 +126,7 @@ module flags(
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
///////////////////////////////////////////////////////////////////////////////
// Underflow
@ -141,7 +140,7 @@ module flags(
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
///////////////////////////////////////////////////////////////////////////////
@ -156,7 +155,7 @@ module flags(
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
assign IntInexact = ((CvtCe[P.NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
@ -178,7 +177,7 @@ module flags(
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[P.NE+1])|((Xs&~Signed)&(~((CvtCe[P.NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// |
// or when the positive res rounds up out of range

View File

@ -26,21 +26,19 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmashiftcalc(
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [`NE+1:0] FmaSe, // sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count
output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [P.NE+1:0] FmaSe, // sum's exponent
input logic [3*P.NF+3:0] FmaSm, // the positive sum
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // normalization shift count
output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
output logic FmaSZero, // is the result subnormal - calculated before LZA corection
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+5:0] FmaShiftIn // is the sum zero
output logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*P.NF+5:0] FmaShiftIn // is the sum zero
);
logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] BiasCorr; // correction for bias
logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias
logic [P.NE+1:0] BiasCorr; // correction for bias
///////////////////////////////////////////////////////////////////////////////
// Normalization
@ -50,75 +48,75 @@ module fmashiftcalc(
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);
assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
if (P.FPSIZES == 1) begin
assign NormSumExp = PreNormSumExp;
end else if (`FPSIZES == 2) begin
assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
end else if (P.FPSIZES == 2) begin
assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS);
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: BiasCorr = '0;
`FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
`FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
P.FMT: BiasCorr = '0;
P.FMT1: BiasCorr = (P.NE+2)'(P.BIAS1-P.BIAS);
P.FMT2: BiasCorr = (P.NE+2)'(P.BIAS2-P.BIAS);
default: BiasCorr = 'x;
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: BiasCorr = '0;
2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
2'h1: BiasCorr = (P.NE+2)'(P.D_BIAS-P.Q_BIAS);
2'h0: BiasCorr = (P.NE+2)'(P.S_BIAS-P.Q_BIAS);
2'h2: BiasCorr = (P.NE+2)'(P.H_BIAS-P.Q_BIAS);
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end
// determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
if (`FPSIZES == 1) begin
if (P.FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin
end else if (P.FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS2));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-2+P.BIAS-P.BIAS2)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
`FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultSubnorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.D_BIAS));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.D_NF-2+P.BIAS-P.D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.S_BIAS));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.S_NF-2+P.BIAS-P.S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.H_BIAS));
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-2+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
@ -132,6 +130,6 @@ module fmashiftcalc(
// set and calculate the shift input and amount
// - shift once if killing a product and the result is subnormal
assign FmaShiftIn = {2'b0, FmaSm};
if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
endmodule

View File

@ -25,26 +25,25 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module negateintres(
module negateintres import cvw::*; #(parameter cvw_t P) (
input logic Signed, // is the integer input signed
input logic Int64, // is the integer input 64-bits
input logic Plus1, // should one be added for rounding?
input logic Xs, // X sign
input logic [`NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter
input logic [P.NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter
output logic [1:0] CvtNegResMsbs, // most signigficant bits of possibly negated result
output logic [`XLEN+1:0] CvtNegRes // possibly negated integer result
output logic [P.XLEN+1:0] CvtNegRes // possibly negated integer result
);
logic [`XLEN+1:0] CvtPreRes; // integer result with rounding
logic [P.XLEN+1:0] CvtPreRes; // integer result with rounding
logic [2:0] CvtNegResMsbs3; // first three msbs of possibly negated result
// round and negate the positive res if needed
assign CvtPreRes = {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
mux2 #(`XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
assign CvtPreRes = {2'b0, Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.XLEN]}+{{P.XLEN+1{1'b0}}, Plus1};
mux2 #(P.XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
// select 2 most significant bits
mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[`XLEN+1:`XLEN-1], Int64, CvtNegResMsbs3);
mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[P.XLEN+1:P.XLEN-1], Int64, CvtNegResMsbs3);
mux2 #(2) msb2mux(CvtNegResMsbs3[2:1], CvtNegResMsbs3[1:0], Signed, CvtNegResMsbs);
endmodule
endmodule

View File

@ -25,8 +25,6 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// convert shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
@ -72,11 +70,11 @@
// | Nf 0's | Qm | << calculated shift amount
// .
module normshift(
input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result
module normshift import cvw::*; #(parameter cvw_t P) (
input logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
input logic [P.NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
output logic [P.NORMSHIFTSZ-1:0] Shifted // shifted result
);
assign Shifted = ShiftIn << ShiftAmt;
endmodule
endmodule

View File

@ -26,14 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module postprocess (
module postprocess import cvw::*; #(parameter cvw_t P) (
// general signals
input logic Xs, Ys, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [P.NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity
@ -44,63 +42,63 @@ module postprocess (
input logic FmaAs, // the modified Z sign - depends on instruction
input logic FmaPs, // the product's sign
input logic FmaSs, // Sum sign
input logic [`NE+1:0] FmaSe, // the sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic [P.NE+1:0] FmaSe, // the sum's exponent
input logic [3*P.NF+3:0] FmaSm, // the positive sum
input logic FmaASticky, // sticky bit that is calculated during alignment
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [`NE+1:0] DivQe, // divsqrt exponent
input logic [`DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivQe, // divsqrt exponent
input logic [P.DIVb:0] DivQm, // divsqrt significand
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [P.NE:0] CvtCe, // the calculated expoent
input logic CvtResSubnormUf, // the convert result is subnormal or underflows
input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
input logic [P.LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic IntZero, // is the integer input zero
// final results
output logic [`FLEN-1:0] PostProcRes,// postprocessor final result
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
output logic [4:0] PostProcFlg,// postprocesser flags
output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result
output logic [P.XLEN-1:0] FCvtIntRes // the integer conversion result
);
// general signals
logic Rs; // result sign
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic [P.NF-1:0] Rf; // Result fraction
logic [P.NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign
logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction
logic [`NE+1:0] Me; // normalized exponent
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction
logic [P.NE+1:0] Me; // normalized exponent
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag)
logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
logic [P.NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
logic [P.NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
logic Plus1; // add one to the final result?
logic Overflow; // overflow flag used to select results
logic Invalid; // invalid flag used to select results
logic Guard, Round, Sticky; // bits needed to determine rounding
logic [`FMTBITS-1:0] OutFmt; // output format
logic [P.FMTBITS-1:0] OutFmt; // output format
// fma signals
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
logic [P.NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*`NF+5:0] FmaShiftIn; // fma shift input
logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic [3*P.NF+5:0] FmaShiftIn; // fma shift input
logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
// division singals
logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
// conversion signals
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
logic CvtResUf; // did the convert result underflow
logic IntInvalid; // invalid integer flag
// readability signals
@ -132,9 +130,9 @@ module postprocess (
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
if (P.FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
///////////////////////////////////////////////////////////////////////////////
@ -142,40 +140,40 @@ module postprocess (
///////////////////////////////////////////////////////////////////////////////
// final claulations before shifting
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
// select which unit's output to shift
always_comb
case(PostProcSel)
2'b10: begin // fma
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};
ShiftIn = {CvtShiftIn, {P.NORMSHIFTSZ-P.CVTLEN-P.NF-1{1'b0}}};
end
2'b01: begin //divsqrt
ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn;
end
default: begin
ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}};
ShiftIn = {`NORMSHIFTSZ{1'bx}};
ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}};
ShiftIn = {P.NORMSHIFTSZ{1'bx}};
end
endcase
// main normalization shift
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted);
// correct for LZA/divsqrt error
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
///////////////////////////////////////////////////////////////////////////////
@ -191,7 +189,7 @@ module postprocess (
// calulate result sign used in rounding unit
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
@ -206,7 +204,7 @@ module postprocess (
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
flags #(P) flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
.Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
@ -216,9 +214,9 @@ module postprocess (
// Select the result
///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module resultsign(
input logic [2:0] Frm, // rounding mode
input logic FmaOp, // is the operation an Fma
@ -77,4 +75,4 @@ module resultsign(
else if(FmaSZero&FmaOp) Rs = Zeros;
else Rs = Ms;
endmodule
endmodule

View File

@ -26,42 +26,32 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // output format
module round import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign
input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction
// fma
input logic FmaOp, // is an fma opperation being done?
input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma
input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma
input logic FmaASticky, // addend's sticky bit
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [`NE+1:0] Qe, // the divsqrt calculated expoent
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
input logic CvtResSubnormUf, // is the cvt result subnormal or underflow
input logic CvtResUf, // does the cvt result underflow
input logic [`NE:0] CvtCe, // the cvt calculated expoent
input logic [P.NE:0] CvtCe, // the cvt calculated expoent
// outputs
output logic [`NE+1:0] Me, // normalied fraction
output logic [P.NE+1:0] Me, // normalied fraction
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NE-1:0] Re, // Result exponent
output logic [`NF-1:0] Rf, // Result fractionNormS
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [P.NE-1:0] Re, // Result exponent
output logic [P.NF-1:0] Rf, // Result fractionNormS
output logic Sticky, // sticky bit
output logic Plus1, // do you add one to the final result
output logic Round, Guard // bits needed to calculate rounding
@ -69,7 +59,7 @@ module round(
logic UfCalcPlus1; // calculated plus one for unbounded exponent
logic NormSticky; // normalized sum's sticky bit
logic [`NF-1:0] RoundFrac; // rounded fraction
logic [P.NF-1:0] RoundFrac; // rounded fraction
logic FpRes; // is the result a floating point
logic IntRes; // is the result an integer
logic FpGuard, FpRound; // floating point round/guard bits
@ -77,8 +67,17 @@ module round(
logic LsbRes; // lsb of result
logic CalcPlus1; // calculated plus1
logic FpPlus1; // do you add one to the fp result
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [P.FLEN:0] RoundAdd; // how much to add to the result
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
//`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
@ -115,68 +114,68 @@ module round(
assign FpRes = ~IntRes;
// sticky bit calculation
if (`FPSIZES == 1) begin
if (P.FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (`FPSIZES == 2) begin
end else if (P.FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
end
@ -184,40 +183,40 @@ module round(
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[P.NE+1]&FmaOp | DivSticky&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
if (P.FPSIZES == 1) begin
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
end else if (`FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (P.FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
FpRound = Mf[`CORRSHIFTSZ-`NF-2];
P.FMT: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
end
`FMT1: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
P.FMT1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
end
`FMT2: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
P.FMT2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
end
default: begin
FpGuard = 1'bx;
@ -225,35 +224,35 @@ module round(
FpRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
end
2'h1: begin
FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
end
2'h0: begin
FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
end
2'h2: begin
FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
end
endcase
end
assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;
assign Guard = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
assign LsbRes = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
assign Round = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;
always_comb begin
@ -287,26 +286,26 @@ module round(
// place Plus1 into the proper position for the format
if (`FPSIZES == 1) begin
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
if (P.FPSIZES == 1) begin
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
end else if (P.FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
end else if (`FPSIZES == 3) begin
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
end else if (P.FPSIZES == 3) begin
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
end else if (`FPSIZES == 4)
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
end else if (P.FPSIZES == 4)
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
// trim unneeded bits from fraction
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
@ -314,7 +313,7 @@ module round(
always_comb
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
default: Me = '0;
@ -325,7 +324,7 @@ module round(
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];
assign Re = FullRe[P.NE-1:0];
endmodule

View File

@ -25,7 +25,6 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module roundsign(
input logic Xs, // x sign
@ -47,4 +46,4 @@ module roundsign(
// Select sign for rounding calulation
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule
endmodule

View File

@ -26,53 +26,51 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module shiftcorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
module shiftcorrection import cvw::*; #(parameter cvw_t P) (
input logic [P.NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [`NE+1:0] DivQe, // the divsqrt result's exponent
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
input logic FmaOp, // is it an fma opperation
input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
input logic FmaSZero,
// output
output logic [`NE+1:0] FmaMe, // exponent of the normalized sum
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe // corrected exponent for divider
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [P.NE+1:0] Qe // corrected exponent for divider
);
logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1];
// correct the shifting error caused by the LZA
// - the only possible mantissa for a plus two is all zeroes
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}};
else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ];
// Determine sum's exponent
// main exponent issues:
@ -82,12 +80,12 @@ module shiftcorrection(
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
// if plus1 If plus2 kill if the result Zero or actually subnormal
// | | |
assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
// recalculate if the result is subnormal after LZA correction
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
endmodule
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
endmodule

View File

@ -26,14 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module specialcase(
module specialcase import cvw::*; #(parameter cvw_t P) (
input logic Xs, // X sign
input logic [`NF:0] Xm, Ym, Zm, // input significand's
input logic [P.NF:0] Xm, Ym, Zm, // input significand's
input logic XNaN, YNaN, ZNaN, // are the inputs NaN
input logic [2:0] Frm, // rounding mode
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // are any inputs infinity
input logic NaNIn, // are any input NaNs
input logic XInf, YInf, // are X or Y inifnity
@ -41,9 +39,9 @@ module specialcase(
input logic Plus1, // do you add one for rounding
input logic Rs, // the result's sign
input logic Invalid, Overflow, // flags to choose the result
input logic [`NE-1:0] Re, // Result exponent
input logic [`NE+1:0] FullRe, // Result full exponent
input logic [`NF-1:0] Rf, // Result fraction
input logic [P.NE-1:0] Re, // Result exponent
input logic [P.NE+1:0] FullRe, // Result full exponent
input logic [P.NF-1:0] Rf, // Result fraction
// fma
input logic FmaOp, // is it a fma opperation
// divsqrt
@ -55,23 +53,23 @@ module specialcase(
input logic IntToFp, // is cvt int -> fp opperation
input logic Int64, // is the integer 64 bits
input logic Signed, // is the integer signed
input logic [`NE:0] CvtCe, // the calculated expoent for cvt
input logic [P.NE:0] CvtCe, // the calculated expoent for cvt
input logic IntInvalid, // integer invalid flag to choose the result
input logic CvtResUf, // does the convert result underflow
input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
input logic [P.XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
// outputs
output logic [`FLEN-1:0] PostProcRes,// final result
output logic [`XLEN-1:0] FCvtIntRes // final integer result
output logic [P.FLEN-1:0] PostProcRes,// final result
output logic [P.XLEN-1:0] FCvtIntRes // final integer result
);
logic [`FLEN-1:0] XNaNRes; // X is NaN result
logic [`FLEN-1:0] YNaNRes; // Y is NaN result
logic [`FLEN-1:0] ZNaNRes; // Z is NaN result
logic [`FLEN-1:0] InvalidRes; // Invalid result result
logic [`FLEN-1:0] UfRes; // underflowed result result
logic [`FLEN-1:0] OfRes; // overflowed result result
logic [`FLEN-1:0] NormRes; // normal result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
logic [P.FLEN-1:0] ZNaNRes; // Z is NaN result
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
logic [P.FLEN-1:0] UfRes; // underflowed result result
logic [P.FLEN-1:0] OfRes; // overflowed result result
logic [P.FLEN-1:0] NormRes; // normal result
logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output
logic OfResMax; // does the of result output maximum norm fp number
logic KillRes; // kill the result for underflow
logic SelOfRes; // should the overflow result be selected
@ -82,158 +80,158 @@ module specialcase(
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
// select correct outputs for special cases
if (`FPSIZES == 1) begin
if (P.FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if(P.IEEE754) begin
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
assign ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = {Rs, Re, Rf};
end else if (`FPSIZES == 2) begin
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else if (P.FPSIZES == 2) begin
if(P.IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end
always_comb
if(OutFmt)
if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
else
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
end else if (`FPSIZES == 3) begin
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
P.FMT: begin
if(P.IEEE754) begin
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
P.FMT1: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
ZNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
P.FMT2: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
ZNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF2]};
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
if(P.IEEE754) begin
XNaNRes = (P.FLEN)'(0);
YNaNRes = (P.FLEN)'(0);
ZNaNRes = (P.FLEN)'(0);
InvalidRes = (P.FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
InvalidRes = (P.FLEN)'(0);
end
OfRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
OfRes = (P.FLEN)'(0);
UfRes = (P.FLEN)'(0);
NormRes = (P.FLEN)'(0);
end
endcase
end else if (`FPSIZES == 4) begin
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if(P.IEEE754) begin
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
ZNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.D_NF]};
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
ZNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.S_NF]};
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
ZNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.H_NF]};
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
// zero is exact if dividing by infinity so don't add 1
UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
end
endcase
end
@ -242,13 +240,13 @@ module specialcase(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
// calculate if the overflow result should be selected
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754)
if(P.IEEE754)
always_comb
if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
@ -283,14 +281,14 @@ module specialcase(
always_comb
if(Signed)
if(Xs&~NaNIn) // signed negitive
if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
if(Int64) OfIntRes = {1'b1, {P.XLEN-1{1'b0}}};
else OfIntRes = {{P.XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
else // signed positive
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
if(Int64) OfIntRes = {1'b0, {P.XLEN-1{1'b1}}};
else OfIntRes = {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
else
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
if(Xs&~NaNIn) OfIntRes = {P.XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive
// select the integer output
@ -301,9 +299,9 @@ module specialcase(
// - otherwise output the normal res (trmined and sign extended if nessisary)
always_comb
if(IntInvalid) FCvtIntRes = OfIntRes;
else if(CvtCe[`NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
endmodule
else if(CvtCe[P.NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}};
else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0];
else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
endmodule

View File

@ -25,41 +25,40 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
module unpack import cvw::*; #(parameter cvw_t P) (
input logic [P.FLEN-1:0] X, Y, Z, // inputs from register file
input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic XEn, YEn, ZEn, // input enables
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic [P.NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [P.NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XSubnorm, // is X subnormal
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax, // does X have the maximum exponent (NaN or Inf)
output logic [`FLEN-1:0] XPostBox // X after being properly NaN-boxed
output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed
);
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero),
.Subnorm(XSubnorm), .PostBox(XPostBox));
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero),
.Subnorm(), .PostBox());
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero),
.Subnorm(), .PostBox());
endmodule
endmodule

View File

@ -25,15 +25,14 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file
module unpackinput import cvw::*; #(parameter cvw_t P) (
input logic [P.FLEN-1:0] In, // inputs from register file
input logic En, // enable the input
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of the number
output logic [`NE-1:0] Exp, // exponent of the number (converted to largest supported precision)
output logic [`NF:0] Man, // mantissa of the number (converted to largest supported precision)
output logic [P.NE-1:0] Exp, // exponent of the number (converted to largest supported precision)
output logic [P.NF:0] Man, // mantissa of the number (converted to largest supported precision)
output logic NaN, // is the number a NaN
output logic SNaN, // is the number a signaling NaN
output logic Zero, // is the number zero
@ -42,29 +41,29 @@ module unpackinput (
output logic FracZero, // is the fraction zero
output logic ExpMax, // does In have the maximum exponent (NaN or Inf)
output logic Subnorm, // is the number subnormal
output logic [`FLEN-1:0] PostBox // Number reboxed correctly as a NaN
output logic [P.FLEN-1:0] PostBox // Number reboxed correctly as a NaN
);
logic [`NF-1:0] Frac; // Fraction of XYZ
logic [P.NF-1:0] Frac; // Fraction of XYZ
logic BadNaNBox; // incorrectly NaN Boxed
if (`FPSIZES == 1) begin // if there is only one floating point format supported
if (P.FPSIZES == 1) begin // if there is only one floating point format supported
assign BadNaNBox = 0;
assign Sgn = In[`FLEN-1]; // sign bit
assign Frac = In[`NF-1:0]; // fraction (no assumed 1)
assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero
assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's
assign Sgn = In[P.FLEN-1]; // sign bit
assign Frac = In[P.NF-1:0]; // fraction (no assumed 1)
assign ExpNonZero = |In[P.FLEN-2:P.NF]; // is the exponent non-zero
assign Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
assign ExpMax = &In[P.FLEN-2:P.NF]; // is the exponent all 1's
assign PostBox = In;
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
end else if (P.FPSIZES == 2) begin // if there are 2 floating point formats supported
// largest format | smaller format
//----------------------------------
// `FLEN | `LEN1 length of floating point number
// `NE | `NE1 length of exponent
// `NF | `NF1 length of fraction
// `BIAS | `BIAS1 exponent's bias value
// `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
// P.FLEN | P.LEN1 length of floating point number
// P.NE | P.NE1 length of exponent
// P.NF | P.NF1 length of fraction
// P.BIAS | P.BIAS1 exponent's bias value
// P.FMT | P.FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
// Possible combinantions specified by spec:
// double and single
@ -76,22 +75,22 @@ module unpackinput (
// quad and half
// double and half
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
always_comb
if (BadNaNBox) begin
// PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
end else
PostBox = In;
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive
assign Sgn = Fmt ? In[P.FLEN-1] : (BadNaNBox ? 0 : In[P.LEN1-1]); // improperly boxed NaNs are treated as positive
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
assign Frac = Fmt ? In[P.NF-1:0] : {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};
// is the exponent non-zero
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
assign ExpNonZero = Fmt ? |In[P.FLEN-2:P.NF] : |In[P.LEN1-2:P.NF1];
// example double to single conversion:
// 1023 = 0011 1111 1111
@ -103,21 +102,21 @@ module unpackinput (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a Subnormal number convert the exponent value 1
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
assign Exp = Fmt ? {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero} : {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero};
// is the exponent all 1's
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
assign ExpMax = Fmt ? &In[P.FLEN-2:P.NF] : &In[P.LEN1-2:P.NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported
end else if (P.FPSIZES == 3) begin // three floating point precsions supported
// largest format | larger format | smallest format
//---------------------------------------------------
// `FLEN | `LEN1 | `LEN2 length of floating point number
// `NE | `NE1 | `NE2 length of exponent
// `NF | `NF1 | `NF2 length of fraction
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
// P.FLEN | P.LEN1 | P.LEN2 length of floating point number
// P.NE | P.NE1 | P.NE2 length of exponent
// P.NF | P.NF1 | P.NF2 length of fraction
// P.BIAS | P.BIAS1 | P.BIAS2 exponent's bias value
// P.FMT | P.FMT1 | P.FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
// Possible combinantions specified by spec:
// quad and double and single
@ -130,20 +129,20 @@ module unpackinput (
// Check NaN boxing
always_comb
case (Fmt)
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
P.FMT: BadNaNBox = 0;
P.FMT1: BadNaNBox = ~&In[P.FLEN-1:P.LEN1];
P.FMT2: BadNaNBox = ~&In[P.FLEN-1:P.LEN2];
default: BadNaNBox = 1'bx;
endcase
always_comb
if (BadNaNBox) begin
case (Fmt)
`FMT: PostBox = In;
// `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
// `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]};
`FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
`FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}};
P.FMT: PostBox = In;
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
default: PostBox = 'x;
endcase
end else
@ -154,27 +153,27 @@ module unpackinput (
if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
else
case (Fmt)
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
P.FMT: Sgn = In[P.FLEN-1];
P.FMT1: Sgn = In[P.LEN1-1];
P.FMT2: Sgn = In[P.LEN2-1];
default: Sgn = 1'bx;
endcase
// extract the fraction
always_comb
case (Fmt)
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
default: Frac = {`NF{1'bx}};
P.FMT: Frac = In[P.NF-1:0];
P.FMT1: Frac = {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};
P.FMT2: Frac = {In[P.NF2-1:0], (P.NF-P.NF2)'(0)};
default: Frac = {P.NF{1'bx}};
endcase
// is the exponent non-zero
always_comb
case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
P.FMT: ExpNonZero = |In[P.FLEN-2:P.NF]; // if input is largest precision (P.FLEN - ie quad or double)
P.FMT1: ExpNonZero = |In[P.LEN1-2:P.NF1]; // if input is larger precsion (P.LEN1 - double or single)
P.FMT2: ExpNonZero = |In[P.LEN2-2:P.NF2]; // if input is smallest precsion (P.LEN2 - single or half)
default: ExpNonZero = 1'bx;
endcase
@ -189,50 +188,50 @@ module unpackinput (
// convert the larger precision's exponent to use the largest precision's bias
always_comb
case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
default: Exp = {`NE{1'bx}};
P.FMT: Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero};
P.FMT1: Exp = {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero};
P.FMT2: Exp = {In[P.LEN2-2], {P.NE-P.NE2{~In[P.LEN2-2]}}, In[P.LEN2-3:P.NF2+1], In[P.NF2]|~ExpNonZero};
default: Exp = {P.NE{1'bx}};
endcase
// is the exponent all 1's
always_comb
case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
P.FMT: ExpMax = &In[P.FLEN-2:P.NF];
P.FMT1: ExpMax = &In[P.LEN1-2:P.NF1];
P.FMT2: ExpMax = &In[P.LEN2-2:P.NF2];
default: ExpMax = 1'bx;
endcase
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
end else if (P.FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
// quad | double | single | half
//-------------------------------------------------------------------
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
// P.Q_LEN | P.D_LEN | P.S_LEN | P.H_LEN length of floating point number
// P.Q_NE | P.D_NE | P.S_NE | P.H_NE length of exponent
// P.Q_NF | P.D_NF | P.S_NF | P.H_NF length of fraction
// P.Q_BIAS | P.D_BIAS | P.S_BIAS | P.H_BIAS exponent's bias value
// P.Q_FMT | P.D_FMT | P.S_FMT | P.H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
// Check NaN boxing
always_comb
case (Fmt)
2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
2'b01: BadNaNBox = ~&In[P.Q_LEN-1:P.D_LEN];
2'b00: BadNaNBox = ~&In[P.Q_LEN-1:P.S_LEN];
2'b10: BadNaNBox = ~&In[P.Q_LEN-1:P.H_LEN];
endcase
always_comb
if (BadNaNBox) begin
case (Fmt)
2'b11: PostBox = In;
// 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]};
// 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]};
// 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]};
2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}};
2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}};
2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}};
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
endcase
end else
PostBox = In;
@ -242,29 +241,29 @@ module unpackinput (
if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
else
case (Fmt)
2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1];
2'b10: Sgn = In[`H_LEN-1];
2'b11: Sgn = In[P.Q_LEN-1];
2'b01: Sgn = In[P.D_LEN-1];
2'b00: Sgn = In[P.S_LEN-1];
2'b10: Sgn = In[P.H_LEN-1];
endcase
// extract the fraction
always_comb
case (Fmt)
2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
2'b11: Frac = In[P.Q_NF-1:0];
2'b01: Frac = {In[P.D_NF-1:0], (P.Q_NF-P.D_NF)'(0)};
2'b00: Frac = {In[P.S_NF-1:0], (P.Q_NF-P.S_NF)'(0)};
2'b10: Frac = {In[P.H_NF-1:0], (P.Q_NF-P.H_NF)'(0)};
endcase
// is the exponent non-zero
always_comb
case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF];
2'b11: ExpNonZero = |In[P.Q_LEN-2:P.Q_NF];
2'b01: ExpNonZero = |In[P.D_LEN-2:P.D_NF];
2'b00: ExpNonZero = |In[P.S_LEN-2:P.S_NF];
2'b10: ExpNonZero = |In[P.H_LEN-2:P.H_NF];
endcase
@ -280,20 +279,20 @@ module unpackinput (
// 1 is added to the exponent if the input is zero or subnormal
always_comb
case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero};
2'b11: Exp = {In[P.Q_LEN-2:P.Q_NF+1], In[P.Q_NF]|~ExpNonZero};
2'b01: Exp = {In[P.D_LEN-2], {P.Q_NE-P.D_NE{~In[P.D_LEN-2]}}, In[P.D_LEN-3:P.D_NF+1], In[P.D_NF]|~ExpNonZero};
2'b00: Exp = {In[P.S_LEN-2], {P.Q_NE-P.S_NE{~In[P.S_LEN-2]}}, In[P.S_LEN-3:P.S_NF+1], In[P.S_NF]|~ExpNonZero};
2'b10: Exp = {In[P.H_LEN-2], {P.Q_NE-P.H_NE{~In[P.H_LEN-2]}}, In[P.H_LEN-3:P.H_NF+1], In[P.H_NF]|~ExpNonZero};
endcase
// is the exponent all 1's
always_comb
case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
2'b11: ExpMax = &In[P.Q_LEN-2:P.Q_NF];
2'b01: ExpMax = &In[P.D_LEN-2:P.D_NF];
2'b00: ExpMax = &In[P.S_LEN-2:P.S_NF];
2'b10: ExpMax = &In[P.H_LEN-2:P.H_NF];
endcase
end
@ -302,9 +301,9 @@ module unpackinput (
assign FracZero = ~|Frac & ~BadNaNBox; // is the fraction zero?
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand
assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign SNaN = NaN&~Frac[P.NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign Inf = ExpMax & FracZero & En; // is the input infinity?
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
assign Subnorm = ~ExpNonZero & ~FracZero & ~BadNaNBox; // is the input subnormal
endmodule
endmodule

View File

@ -26,8 +26,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module hazard (
// Detect hazards
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,

View File

@ -27,9 +27,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module bmuctrl(
module bmuctrl import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
// Decode stage control signals
input logic StallD, FlushD, // Stall, flush Decode stage
@ -76,13 +74,13 @@ module bmuctrl(
always_comb begin
// BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD
BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction;
if (`ZBA_SUPPORTED) begin
if (P.ZBA_SUPPORTED) begin
casez({OpD, Funct7D, Funct3D})
17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh1add
17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh2add
17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh3add
endcase
if (`XLEN==64)
if (P.XLEN==64)
casez({OpD, Funct7D, Funct3D})
17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh1add.uw
17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh2add.uw
@ -91,7 +89,7 @@ module bmuctrl(
17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0; // slli.uw
endcase
end
if (`ZBB_SUPPORTED) begin
if (P.ZBB_SUPPORTED) begin
casez({OpD, Funct7D, Funct3D})
17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // rol
17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // ror
@ -100,13 +98,13 @@ module bmuctrl(
else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0]))
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0; // count instruction
// // coverage off: This case can't occur in RV64
// 17'b0110011_0000100_100: if (`XLEN == 32)
// 17'b0110011_0000100_100: if (P.XLEN == 32)
// BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
// // coverage on
17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0; // andn
17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0; // orn
17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0; // xnor
17'b0010011_011010?_101: if ((`XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8
17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b
@ -115,12 +113,12 @@ module bmuctrl(
17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min
17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu
endcase
if (`XLEN==32)
if (P.XLEN==32)
casez({OpD, Funct7D, Funct3D})
17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv32)
endcase
else if (`XLEN==64)
else if (P.XLEN==64)
casez({OpD, Funct7D, Funct3D})
17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0; // zexth (rv64)
17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rolw
@ -131,25 +129,25 @@ module bmuctrl(
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0; // count word instruction
endcase
end
if (`ZBC_SUPPORTED)
if (P.ZBC_SUPPORTED)
casez({OpD, Funct7D, Funct3D})
17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0; // ZBC instruction
endcase
if (`ZBS_SUPPORTED) begin // ZBS
if (P.ZBS_SUPPORTED) begin // ZBS
casez({OpD, Funct7D, Funct3D})
17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0; // bclr
17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0; // bext
17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0; // binv
17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0; // bset
endcase
if (`XLEN==32) // ZBS 64-bit
if (P.XLEN==32) // ZBS 64-bit
casez({OpD, Funct7D, Funct3D})
17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri
17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti
17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi
17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti
endcase
else if (`XLEN==64) // ZBS 64-bit
else if (P.XLEN==64) // ZBS 64-bit
casez({OpD, Funct7D, Funct3D})
17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri (rv64)
17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti (rv64)
@ -157,7 +155,7 @@ module bmuctrl(
17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti (rv64)
endcase
end
if (`ZBB_SUPPORTED | `ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
casez({OpD, Funct7D, Funct3D})
17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0; // sra, srl, sll
17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0; // srai, srli, slli
@ -176,5 +174,5 @@ module bmuctrl(
assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);
// BMU Execute stage pipieline control register
flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
flopenrc #(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
endmodule

View File

@ -27,10 +27,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module controller(
module controller import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
// Decode stage control signals
input logic StallD, FlushD, // Stall, flush Decode stage
@ -142,30 +139,30 @@ module controller(
// Be rigorous about detecting illegal instructions if CSRs or bit manipulation is supported
// otherwise be cheap
if (`ZICSR_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED | `ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
if (P.ZICSR_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED | P.ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD;
logic Funct7ShiftZeroD, Funct7Shiftb5D;
assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions
assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub
assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
assign Funct7Shiftb5D = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
assign Funct7ShiftZeroD = (P.XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
assign Funct7Shiftb5D = (P.XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms
assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101));
assign IFunctD = IShiftD | INoShiftD;
assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD;
assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
assign MFunctD = (Funct7D == 7'b0000001) & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 |
((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
((P.XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 |
((`XLEN == 64) & (Funct3D == 3'b011));
((P.XLEN == 64) & (Funct3D == 3'b011));
assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches
assign JFunctD = (Funct3D == 3'b000);
assign IWValidFunct3D = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b101;
end else begin:legalcheck2
assign IFunctD = 1; // Don't bother to separate out shift decoding
assign RFunctD = ~Funct7D[0]; // Not a multiply
assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
assign MFunctD = Funct7D[0] & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
assign LFunctD = 1; // don't bother to check Funct3 for loads
assign SFunctD = 1; // don't bother to check Funct3 for stores
assign BFunctD = 1; // don't bother to check Funct3 for branches
@ -182,19 +179,19 @@ module controller(
7'b0000011: if (LFunctD)
ControlsD = `CTRLW'b1_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // loads
7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported
7'b0001111: if (`ZIFENCEI_SUPPORTED)
7'b0001111: if (P.ZIFENCEI_SUPPORTED)
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence
else
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop
7'b0010011: if (IFunctD)
ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc
7'b0011011: if (IFunctD & IWValidFunct3D & `XLEN == 64)
7'b0011011: if (IFunctD & IWValidFunct3D & P.XLEN == 64)
ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_1_0_0_0_0_00_0; // IW-type ALU for RV64i
7'b0100011: if (SFunctD)
ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // stores
7'b0100111: ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_1; // fsw - only legal if FP supported
7'b0101111: if (`A_SUPPORTED) begin
7'b0101111: if (P.A_SUPPORTED) begin
if (InstrD[31:27] == 5'b00010)
ControlsD = `CTRLW'b1_000_00_10_001_0_0_0_0_0_0_0_0_0_01_0; // lr
else if (InstrD[31:27] == 5'b00011)
@ -207,16 +204,16 @@ module controller(
else if (MFunctD)
ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide
7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui
7'b0111011: if (RFunctD & (`XLEN == 64))
7'b0111011: if (RFunctD & (P.XLEN == 64))
ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i
else if (MFunctD & (`XLEN == 64))
else if (MFunctD & (P.XLEN == 64))
ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide
7'b1100011: if (BFunctD)
ControlsD = `CTRLW'b0_010_11_00_000_1_0_0_0_0_0_0_0_0_00_0; // branches
7'b1100111: if (JFunctD)
ControlsD = `CTRLW'b1_000_01_00_000_0_0_1_1_0_0_0_0_0_00_0; // jalr
7'b1101111: ControlsD = `CTRLW'b1_011_11_00_000_0_0_1_1_0_0_0_0_0_00_0; // jal
7'b1110011: if (`ZICSR_SUPPORTED) begin
7'b1110011: if (P.ZICSR_SUPPORTED) begin
if (Funct3D == 3'b000)
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_1_0_0_00_0; // privileged; decoded further in priveleged modules
else
@ -229,7 +226,7 @@ module controller(
// Unswizzle control bits
// Squash control signals if coming from an illegal compressed instruction
// On RV32E, can't write to upper 16 registers. Checking reads to upper 16 is more costly so disregard them.
assign IllegalERegAdrD = `E_SUPPORTED & `ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11];
assign IllegalERegAdrD = P.E_SUPPORTED & P.ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11];
//assign IllegalBaseInstrD = 1'b0;
assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD,
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD,
@ -247,17 +244,17 @@ module controller(
assign BaseSubArithD = ALUOpD & (subD | sraD | sltD | sltuD);
// bit manipulation Configuration Block
if (`ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
if (P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
logic IllegalBitmanipInstrD; // Unrecognized B instruction
logic BRegWriteD; // Indicates if it is a R type BMU instruction in decode stage
logic BW64D; // Indicates if it is a W type BMU instruction in decode stage
logic BSubArithD; // TRUE for BMU ext, clr, andn, orn, xnor
logic BALUSrcBD; // BMU alu src select signal
bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
bmuctrl #(P) bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
.BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE,
.ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
if (`ZBA_SUPPORTED) begin
if (P.ZBA_SUPPORTED) begin
// ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
end else assign sltD = (Funct3D == 3'b010);
@ -290,7 +287,7 @@ module controller(
// Fences
// Ordinary fence is presently a nop
// fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented
if (`ZIFENCEI_SUPPORTED & `ICACHE_SUPPORTED) begin:fencei
if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei
logic FenceID;
assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction?
assign InvalidateICacheD = FenceID;
@ -338,5 +335,5 @@ module controller(
// the synchronous DTIM cannot read immediately after write
// a cache cannot read or write immediately after a write
assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & `DCACHE_SUPPORTED)) | (|AtomicD));
assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED)) | (|AtomicD));
endmodule

View File

@ -27,16 +27,14 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module datapath (
module datapath import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
// Decode stage signals
input logic [2:0] ImmSrcD, // Selects type of immediate extension
input logic [31:0] InstrD, // Instruction in Decode stage
// Execute stage signals
input logic [`XLEN-1:0] PCE, // PC in Execute stage
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
input logic [P.XLEN-1:0] PCE, // PC in Execute stage
input logic [P.XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
input logic StallE, FlushE, // Stall, flush Execute stage
input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages
@ -51,24 +49,24 @@ module datapath (
input logic [2:0] ZBBSelectE, // ZBB mux select signal
input logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage
output logic [1:0] FlagsE, // Comparison flags ({eq, lt})
output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
output logic [P.XLEN-1:0] IEUAdrE, // Address computed by ALU
output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
// Memory stage signals
input logic StallM, FlushM, // Stall, flush Memory stage
input logic FWriteIntM, FCvtIntW, // FPU writes integer register file, FPU converts float to int
input logic [`XLEN-1:0] FIntResM, // FPU integer result
output logic [`XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes
output logic [`XLEN-1:0] WriteDataM, // Write data in Memory stage
input logic [P.XLEN-1:0] FIntResM, // FPU integer result
output logic [P.XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes
output logic [P.XLEN-1:0] WriteDataM, // Write data in Memory stage
// Writeback stage signals
input logic StallW, FlushW, // Stall, flush Writeback stage
input logic RegWriteW, IntDivW, // Write register file, integer divide instruction
input logic SquashSCW, // Squash a store conditional when a conflict arose
input logic [2:0] ResultSrcW, // Select source of result to write back to register file
input logic [`XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result
input logic [`XLEN-1:0] ReadDataW, // Read data from LSU
input logic [`XLEN-1:0] CSRReadValW, // CSR read result
input logic [`XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result
input logic [`XLEN-1:0] FIntDivResultW, // FPU's integer divide result
input logic [P.XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result
input logic [P.XLEN-1:0] ReadDataW, // Read data from LSU
input logic [P.XLEN-1:0] CSRReadValW, // CSR read result
input logic [P.XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result
input logic [P.XLEN-1:0] FIntDivResultW, // FPU's integer divide result
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, // Register sources to read in Decode or Execute stage
output logic [4:0] RdE, RdM, RdW // Register destinations in Execute, Memory, or Writeback stage
@ -76,64 +74,64 @@ module datapath (
// Fetch stage signals
// Decode stage signals
logic [`XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2)
logic [`XLEN-1:0] ImmExtD; // Extended immediate in Decode stage
logic [P.XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2)
logic [P.XLEN-1:0] ImmExtD; // Extended immediate in Decode stage
logic [4:0] RdD; // Destination register in Decode stage
// Execute stage signals
logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file
logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands
logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
logic [P.XLEN-1:0] R1E, R2E; // Source operands read from register file
logic [P.XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
logic [P.XLEN-1:0] SrcAE, SrcBE; // ALU operands
logic [P.XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
// Memory stage signals
logic [`XLEN-1:0] IEUResultM; // Result from execution stage
logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
logic [P.XLEN-1:0] IEUResultM; // Result from execution stage
logic [P.XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
// Writeback stage signals
logic [`XLEN-1:0] SCResultW; // Store Conditional result
logic [`XLEN-1:0] ResultW; // Result to write to register file
logic [`XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register
logic [`XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int
logic [`XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division)
logic [P.XLEN-1:0] SCResultW; // Store Conditional result
logic [P.XLEN-1:0] ResultW; // Result to write to register file
logic [P.XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register
logic [P.XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int
logic [P.XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division)
// Decode stage
assign Rs1D = InstrD[19:15];
assign Rs2D = InstrD[24:20];
assign RdD = InstrD[11:7];
regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
extend #(P.XLEN, P.A_SUPPORTED) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
// Execute stage pipeline register and logic
flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
flopenrc #(`XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
mux3 #(P.XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
mux3 #(P.XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
alu #(P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
// Memory stage pipeline register
flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
// Writeback stage pipeline register and logic
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
// floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt
if (`F_SUPPORTED) begin:fpmux
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
if (`IDIV_ON_FPU) begin
mux2 #(`XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
if (P.F_SUPPORTED) begin:fpmux
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
if (P.IDIV_ON_FPU) begin
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
end else begin
assign MulDivResultW = MDUResultW;
end
@ -142,9 +140,9 @@ module datapath (
assign IFCvtResultW = IFResultW;
assign MulDivResultW = MDUResultW;
end
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(P.XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
// handle Store Conditional result if atomic extension supported
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
if (P.A_SUPPORTED) assign SCResultW = {{(P.XLEN-1){1'b0}}, SquashSCW};
else assign SCResultW = 0;
endmodule
endmodule

View File

@ -27,29 +27,27 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module extend (
module extend #(parameter XLEN, A_SUPPORTED) (
input logic [31:7] InstrD, // All instruction bits except opcode (lower 7 bits)
input logic [2:0] ImmSrcD, // Select what kind of extension to perform
output logic [`XLEN-1:0 ] ImmExtD); // Extended immediate
output logic [XLEN-1:0 ] ImmExtD); // Extended immediate
localparam [`XLEN-1:0] undefined = {(`XLEN){1'bx}}; // could change to 0 after debug
localparam [XLEN-1:0] undefined = {(XLEN){1'bx}}; // could change to 0 after debug
always_comb
case(ImmSrcD)
// I-type
3'b000: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:20]};
3'b000: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:20]};
// S-type (stores)
3'b001: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]};
3'b001: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]};
// B-type (branches)
3'b010: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0};
3'b010: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0};
// J-type (jal)
3'b011: ImmExtD = {{(`XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0};
3'b011: ImmExtD = {{(XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0};
// U-type (lui, auipc)
3'b100: ImmExtD = {{(`XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0};
3'b100: ImmExtD = {{(XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0};
// Store Conditional: zero offset
3'b101: if (`A_SUPPORTED) ImmExtD = 0;
3'b101: if (A_SUPPORTED) ImmExtD = 0;
else ImmExtD = undefined;
default: ImmExtD = undefined; // undefined
endcase

View File

@ -27,8 +27,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module forward(
// Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers

View File

@ -26,45 +26,44 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module ieu (
module ieu import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
// Decode stage signals
input logic [31:0] InstrD, // Instruction
input logic IllegalIEUFPUInstrD, // Illegal instruction
output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
// Execute stage signals
input logic [`XLEN-1:0] PCE, // PC
input logic [`XLEN-1:0] PCLinkE, // PC + 4
input logic [P.XLEN-1:0] PCE, // PC
input logic [P.XLEN-1:0] PCLinkE, // PC + 4
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
output logic [`XLEN-1:0] IEUAdrE, // Memory address
output logic [P.XLEN-1:0] IEUAdrE, // Memory address
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
output logic [2:0] Funct3E, // Funct3 instruction field
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
output logic [4:0] RdE, // Destination register
// Memory stage signals
input logic SquashSCW, // Squash store conditional, from LSU
output logic [1:0] MemRWM, // Read/write control goes to LSU
output logic [1:0] AtomicM, // Atomic control goes to LSU
output logic [`XLEN-1:0] WriteDataM, // Write data to LSU
output logic [P.XLEN-1:0] WriteDataM, // Write data to LSU
output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU
output logic [`XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU
output logic [P.XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU
output logic [4:0] RdM, // Destination register
input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
input logic [P.XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid
output logic BranchD, BranchE,
output logic JumpD, JumpE,
// Writeback stage signals
input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
input logic [`XLEN-1:0] CSRReadValW, // CSR read value,
input logic [`XLEN-1:0] MDUResultW, // multiply/divide unit result
input logic [`XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result
input logic [P.XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
input logic [P.XLEN-1:0] CSRReadValW, // CSR read value,
input logic [P.XLEN-1:0] MDUResultW, // multiply/divide unit result
input logic [P.XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result
input logic FCvtIntW, // FPU converts float to int
output logic [4:0] RdW, // Destination register
input logic [`XLEN-1:0] ReadDataW, // LSU's read data
input logic [P.XLEN-1:0] ReadDataW, // LSU's read data
// Hazard unit signals
input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
@ -96,7 +95,7 @@ module ieu (
logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction
controller c(
controller #(P) c(
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
.IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
.PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE,
@ -105,7 +104,7 @@ controller c(
.RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);
datapath dp(
datapath #(P) dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
.Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE,
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE,

View File

@ -27,18 +27,16 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module regfile (
module regfile #(parameter XLEN, E_SUPPORTED) (
input logic clk, reset,
input logic we3, // Write enable
input logic [4:0] a1, a2, a3, // Source registers to read (a1, a2), destination register to write (a3)
input logic [`XLEN-1:0] wd3, // Write data for port 3
output logic [`XLEN-1:0] rd1, rd2); // Read data for ports 1, 2
input logic [XLEN-1:0] wd3, // Write data for port 3
output logic [XLEN-1:0] rd1, rd2); // Read data for ports 1, 2
localparam NUMREGS = `E_SUPPORTED ? 16 : 32; // only 16 registers in E mode
localparam NUMREGS = E_SUPPORTED ? 16 : 32; // only 16 registers in E mode
logic [`XLEN-1:0] rf[NUMREGS-1:1];
logic [XLEN-1:0] rf[NUMREGS-1:1];
integer i;
// Three ported register file

View File

@ -27,9 +27,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module RASPredictor #(parameter int StackSize = 16 )(
module RASPredictor import cvw::*; #(parameter cvw_t P, StackSize = 16 )(
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
@ -37,15 +35,15 @@ module RASPredictor #(parameter int StackSize = 16 )(
input logic ReturnD,
input logic ReturnE, CallE, // Instr class
input logic BPReturnF,
input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call
output logic [`XLEN-1:0] RASPCF // Top of the stack
input logic [P.XLEN-1:0] PCLinkE, // PC of instruction after a call
output logic [P.XLEN-1:0] RASPCF // Top of the stack
);
logic CounterEn;
localparam Depth = $clog2(StackSize);
logic [Depth-1:0] NextPtr, Ptr, P1, M1, IncDecPtr;
logic [StackSize-1:0] [`XLEN-1:0] memory;
logic [StackSize-1:0] [P.XLEN-1:0] memory;
integer index;
logic PopF;
@ -85,7 +83,7 @@ module RASPredictor #(parameter int StackSize = 16 )(
always_ff @ (posedge clk) begin
if(reset) begin
for(index=0; index<StackSize; index++)
memory[index] <= {`XLEN{1'b0}};
memory[index] <= {P.XLEN{1'b0}};
end else if(PushE) begin
memory[NextPtr] <= #1 PCLinkE;
end

View File

@ -26,27 +26,25 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define INSTR_CLASS_PRED 1
module bpred (
module bpred import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
// Fetch stage
// the prediction
input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class
input logic [`XLEN-1:0] PCNextF, // Next Fetch Address
input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4
output logic [`XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction
output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage
input logic [P.XLEN-1:0] PCNextF, // Next Fetch Address
input logic [P.XLEN-1:0] PCPlus2or4F, // PCF+2/4
output logic [P.XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction
output logic [P.XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage
// Update Predictor
input logic [`XLEN-1:0] PCF, // Fetch stage instruction address
input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took
input logic [`XLEN-1:0] PCE, // Execution stage instruction address
input logic [`XLEN-1:0] PCM, // Memory stage instruction address
input logic [P.XLEN-1:0] PCF, // Fetch stage instruction address
input logic [P.XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took
input logic [P.XLEN-1:0] PCE, // Execution stage instruction address
input logic [P.XLEN-1:0] PCM, // Memory stage instruction address
input logic [31:0] PostSpillInstrRawF, // Instruction
@ -55,9 +53,9 @@ module bpred (
input logic BranchD, BranchE,
input logic JumpD, JumpE,
input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [P.XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [P.XLEN-1:0] IEUAdrM, // The branch/jump target address
input logic [P.XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br
// Report branch prediction status
@ -71,21 +69,21 @@ module bpred (
logic [1:0] BPDirPredF;
logic [`XLEN-1:0] BPBTAF, RASPCF;
logic [P.XLEN-1:0] BPBTAF, RASPCF;
logic BPPCWrongE;
logic IClassWrongE;
logic BPDirPredWrongE;
logic BPPCSrcF;
logic [`XLEN-1:0] BPPCF;
logic [`XLEN-1:0] PC0NextF;
logic [`XLEN-1:0] PCCorrectE;
logic [P.XLEN-1:0] BPPCF;
logic [P.XLEN-1:0] PC0NextF;
logic [P.XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic [`XLEN-1:0] BPBTAD;
logic [P.XLEN-1:0] BPBTAD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
@ -95,57 +93,58 @@ module bpred (
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic BPReturnWrongD;
logic [`XLEN-1:0] BPBTAE;
logic [P.XLEN-1:0] BPBTAE;
// Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong.
if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor
twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW,
if (P.BPRED_TYPE == BP_TWOBIT) begin:Predictor
twoBitPredictor #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW,
.FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor
gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
end else if (P.BPRED_TYPE == BP_GSHARE) begin:Predictor
gshare #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW,
.PCSrcE);
end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor
gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
end else if (P.BPRED_TYPE == BP_GLOBAL) begin:Predictor
gshare #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW,
.PCSrcE);
end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor
gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
end else if (P.BPRED_TYPE == BP_GSHARE_BASIC) begin:Predictor
gsharebasic #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor
gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
end else if (P.BPRED_TYPE == BP_GLOBAL_BASIC) begin:Predictor
gsharebasic #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
// *** Fix me
/* -----\/----- EXCLUDED -----\/-----
localHistoryPredictor DirPredictor(.clk,
.reset, .StallF, .StallE,
.LookUpPC(PCNextF),
.Prediction(BPDirPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0] & ~StallE),
.PCSrcE,
.UpdatePrediction(InstrClassE[0]));
-----/\----- EXCLUDED -----/\----- */
end else if (P.BPRED_TYPE == BP_LOCAL_BASIC) begin:Predictor
localbpbasic #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (P.BPRED_TYPE == BP_LOCAL_AHEAD) begin:Predictor
localaheadbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
.BranchE, .BranchM, .PCSrcE);
end else if (P.BPRED_TYPE == BP_LOCAL_REPAIR) begin:Predictor
localrepairbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCE, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
.BranchD, .BranchE, .BranchM, .PCSrcE);
end
// Part 2 Branch target address prediction
// BTB contains target address for all CFI
btb #(`BTB_SIZE)
btb #(P, P.BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM,
.BPBTAF, .BPBTAD, .BPBTAE,
@ -157,13 +156,13 @@ module bpred (
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
.InstrClassW({CallW, ReturnW, JumpW, BranchW}));
icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
icpred #(P, `INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
.BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD);
// Part 3 RAS
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
RASPredictor #(P) RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
.BPReturnF, .ReturnD, .ReturnE, .CallE,
.BPReturnWrongD, .RASPCF, .PCLinkE);
@ -179,21 +178,21 @@ module bpred (
// Output the predicted PC or corrected PC on miss-predict.
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
mux2 #(P.XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
// Selects the BP or PC+2/4.
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
mux2 #(P.XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
// If the prediction is wrong select the correct address.
mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);
mux2 #(P.XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);
// Correct branch/jump target.
mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
mux2 #(P.XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
// If the fence/csrw was predicted as a taken branch then we select PCF, rather than PCE.
// Effectively this is PCM+4 or the non-existant PCLinkM
if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
if(`INSTR_CLASS_PRED) mux2 #(P.XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
else assign NextValidPCE = PCE;
if(`ZICOUNTERS_SUPPORTED) begin
logic [`XLEN-1:0] RASPCD, RASPCE;
if(P.ZICOUNTERS_SUPPORTED) begin
logic [P.XLEN-1:0] RASPCD, RASPCE;
logic BTAWrongE, RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
@ -209,8 +208,8 @@ module bpred (
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(P.XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(P.XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});

View File

@ -28,22 +28,20 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module btb #(parameter Depth = 10 ) (
module btb import cvw::*; #(parameter cvw_t P, Depth = 10 ) (
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BPBTAD,
output logic [`XLEN-1:0] BPBTAE,
input logic [P.XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
output logic [P.XLEN-1:0] BPBTAF, // BTB's guess at PC
output logic [P.XLEN-1:0] BPBTAD,
output logic [P.XLEN-1:0] BPBTAE,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
// update
input logic IClassWrongM, // BTB's instruction class guess was wrong
input logic IClassWrongE,
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
input logic [P.XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [P.XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
input logic [3:0] InstrClassD, // Instruction class to insert into btb
input logic [3:0] InstrClassE, // Instruction class to insert into btb
input logic [3:0] InstrClassM, // Instruction class to insert into btb
@ -51,12 +49,12 @@ module btb #(parameter Depth = 10 ) (
);
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
logic [`XLEN-1:0] ResetPC;
logic [P.XLEN-1:0] ResetPC;
logic MatchD, MatchE, MatchM, MatchW, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredF;
logic [`XLEN-1:0] IEUAdrW;
logic [`XLEN-1:0] PCW;
logic [P.XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [P.XLEN+3:0] TableBTBPredF;
logic [P.XLEN-1:0] IEUAdrW;
logic [P.XLEN-1:0] PCW;
logic BTBWrongE, BPBTAWrongE;
logic BTBWrongM, BPBTAWrongM;
@ -75,7 +73,7 @@ module btb #(parameter Depth = 10 ) (
// during reset. The BTB must produce a non X PC1NextF to allow the simulation to run.
// While the mux could be included in IFU it is not necessary for the IROM/I$/bus.
// For now it is optimal to leave it here.
assign ResetPC = `RESET_VECTOR;
assign ResetPC = P.RESET_VECTOR[P.XLEN-1:0];
assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]};
assign MatchD = PCFIndex == PCDIndex;
@ -93,22 +91,22 @@ module btb #(parameter Depth = 10 ) (
// An optimization may be using a PC relative address.
ram2p1r1wbe #(2**Depth, `XLEN+4) memory(
ram2p1r1wbe #(2**Depth, P.XLEN+4) memory(
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
flopenrc #(P.XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
// BPBTAE is not strickly necessary. However it is used by two parts of wally.
// 1. It gates updates to the BTB when the prediction does not change. This save power.
// 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong.
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
flopenrc #(P.XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM);
assign BTBWrongM = BPBTAWrongM | IClassWrongM;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
flopenr #(P.XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(P.XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
endmodule

View File

@ -27,9 +27,9 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module gshare #(parameter k = 10,
module gshare #(parameter XLEN,
parameter k = 10,
parameter integer TYPE = 1) (
input logic clk,
input logic reset,
@ -38,7 +38,7 @@ module gshare #(parameter k = 10,
output logic [1:0] BPDirPredF,
output logic BPDirPredWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic [XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE
);

View File

@ -27,9 +27,8 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module gsharebasic #(parameter k = 10,
module gsharebasic #(parameter XLEN,
parameter k = 10,
parameter TYPE = 1) (
input logic clk,
input logic reset,
@ -38,7 +37,7 @@ module gsharebasic #(parameter k = 10,
output logic [1:0] BPDirPredF,
output logic BPDirPredWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCM,
input logic [XLEN-1:0] PCNextF, PCM,
input logic BranchE, BranchM, PCSrcE
);

View File

@ -26,10 +26,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module icpred #(parameter INSTR_CLASS_PRED = 1)(
module icpred import cvw::*; #(parameter cvw_t P, INSTR_CLASS_PRED = 1)(
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
@ -56,10 +53,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
logic NCJumpF, NCBranchF;
if(`C_SUPPORTED) begin
if(P.C_SUPPORTED) begin
logic [4:0] CompressedOpcF;
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32;
assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32;
assign cj = CompressedOpcF == 5'h0d;
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
@ -72,13 +69,13 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63;
assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF);
assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF));
assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF);
assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF));
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
(`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
(P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
(`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
(P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
end else begin
// This section connects the BTB's instruction class prediction.

View File

@ -1,130 +0,0 @@
///////////////////////////////////////////
// locallHistoryPredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Global History Branch predictor with parameterized global history register
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module localHistoryPredictor #(parameter m = 6, // 2^m = number of local history branches
k = 10) ( // number of past branches stored
input logic clk,
input logic reset,
input logic StallF, StallE,
input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] Prediction,
// update
input logic [`XLEN-1:0] UpdatePC,
input logic UpdateEN, PCSrcE,
input logic [1:0] UpdatePrediction
);
logic [2**m-1:0][k-1:0] LHRNextF;
logic [k-1:0] LHRF, ForwardLHRNext, LHRFNext;
logic [m-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
logic DoForwarding, DoForwardingF, DoForwardingPHT, DoForwardingPHTF;
logic [1:0] UpdatePredictionF;
assign LHRFNext = {PCSrcE, LHRF[k-1:1]};
assign UpdatePCIndex = {UpdatePC[m+1] ^ UpdatePC[1], UpdatePC[m:2]};
assign LookUpPCIndex = {LookUpPC[m+1] ^ LookUpPC[1], LookUpPC[m:2]};
// INCASE we do ahead pipelining
// ram2p1r1wb #(m,k) LHR(.clk(clk)),
// .reset(reset),
// .RA1(LookUpPCIndex), // need hashing function to get correct PC address
// .RD1(LHRF),
// .REN1(~StallF),
// .WA1(UpdatePCIndex),
// .WD1(LHRENExt),
// .WEN1(UpdateEN),
// .BitWEN1(2'b11));
genvar index;
for (index = 0; index < 2**m; index = index +1) begin:localhist
flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateEN & (index == UpdatePCIndex)),
.d(LHRFNext), .q(LHRNextF[index]));
end
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same
assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
assign ForwardLHRNext = DoForwarding ? LHRFNext :LHRNextF[LookUpPCIndex];
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
// LHR referes to the address that the past k branches points to in the prediction stage
// LHRE refers to the address that the past k branches points to in the exectution stage
ram2p1r1wb #(k, 2) PHT(.clk(clk),
.reset(reset),
.ra1(ForwardLHRNext),
.rd1(PredictionMemory),
.ren1(~StallF),
.wa2(LHRFNext),
.wd2(UpdatePrediction),
.wen2(UpdateEN),
.bwe2(2'b11));
assign DoForwardingPHT = LHRFNext == ForwardLHRNext;
// register the update value and the forwarding signal into the Fetch stage
// TODO: add stall logic ***
flopr #(1) DoForwardingReg(.clk(clk),
.reset(reset),
.d(DoForwardingPHT),
.q(DoForwardingPHTF));
flopr #(2) UpdatePredictionReg(.clk(clk),
.reset(reset),
.d(UpdatePrediction),
.q(UpdatePredictionF));
assign Prediction = DoForwardingPHTF ? UpdatePredictionF : PredictionMemory;
//pipeline for LHR
flopenrc #(k) LHRFReg(.clk(clk),
.reset(reset),
.en(~StallF),
.clear(1'b0),
.d(ForwardLHRNext),
.q(LHRF));
/*
flopenrc #(k) LHRDReg(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(LHRF),
.q(LHRD));
flopenrc #(k) LHREReg(.clk(clk),
.reset(reset),
.en(~StallE),
.clear(FlushE),
.d(LHRD),
.q(LHRE));
*/
endmodule

View File

@ -0,0 +1,114 @@
///////////////////////////////////////////
// localaheadbp
//
// Written: Ross Thompson
// Email: ross1728@gmail.com
// Created: 16 March 2021
//
// Purpose: local history branch predictor with ahead pipelining and SRAM memories.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module localaheadbp #(parameter XLEN,
parameter m = 6, // 2^m = number of local history branches
parameter k = 10) ( // number of past branches stored
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] BPDirPredD,
output logic BPDirPredWrongE,
// update
input logic [XLEN-1:0] PCNextF, PCM,
input logic BranchE, BranchM, PCSrcE
);
logic [k-1:0] IndexNextF, IndexM;
//logic [1:0] BPDirPredD, BPDirPredE;
logic [1:0] BPDirPredE;
logic [1:0] BPDirPredM;
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
logic [k-1:0] LHRNextW;
logic PCSrcM;
logic [2**m-1:0][k-1:0] LHRArray;
logic [m-1:0] IndexLHRNextF, IndexLHRM;
logic [XLEN-1:0] PCW;
logic UpdateM;
//assign IndexNextF = LHR;
assign IndexM = LHRW;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallD), .ce2(~StallW & ~FlushW),
.ra1(LHRF),
.rd1(BPDirPredD),
.wa2(IndexM),
.wd2(NewBPDirPredW),
.we2(BranchM),
.bwe2(1'b1));
//flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
flopenrc #(2) PredictionRegM(clk, reset, FlushM, ~StallM, BPDirPredE, BPDirPredM);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
//flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
// This is the main difference between global and local history basic implementations. In global,
// the ghr wraps back into itself directly without
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
// this is local history
//genvar index;
//assign UpdateM = BranchM & ~StallW & ~FlushW;
assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
.ce1(~StallF), .ce2(~StallW & ~FlushW),
.ra1(IndexLHRNextF),
.rd1(LHRF),
.wa2(IndexLHRM),
.wd2(LHRNextW),
.we2(BranchM),
.bwe2('1));
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
//flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
//assign LHRF = LHRNextF;
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule

View File

@ -0,0 +1,105 @@
///////////////////////////////////////////
// localbpbasic
//
// Written: Ross Thompson
// Email: ross1728@gmail.com
// Created: 16 March 2021
//
// Purpose: Local history branch predictor. Basic implementation without any repair and flop memories.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module localbpbasic #(parameter XLEN,
parameter m = 6, // 2^m = number of local history branches
parameter k = 10) ( // number of past branches stored
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] BPDirPredF,
output logic BPDirPredWrongE,
// update
input logic [XLEN-1:0] PCNextF, PCM,
input logic BranchE, BranchM, PCSrcE
);
logic [k-1:0] IndexNextF, IndexM;
logic [1:0] BPDirPredD, BPDirPredE;
logic [1:0] NewBPDirPredE, NewBPDirPredM;
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHR;
logic [k-1:0] LHRNextW;
logic PCSrcM;
logic [2**m-1:0][k-1:0] LHRArray;
logic [m-1:0] IndexLHRNextF, IndexLHRM;
logic UpdateM;
assign IndexNextF = LHR;
assign IndexM = LHRM;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF), .ce2(~StallW & ~FlushW),
.ra1(IndexNextF),
.rd1(BPDirPredF),
.wa2(IndexM),
.wd2(NewBPDirPredM),
.we2(BranchM),
.bwe2(1'b1));
flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE));
flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
// This is the main difference between global and local history basic implementations. In global,
// the ghr wraps back into itself directly without
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
// this is local history
genvar index;
assign UpdateM = BranchM & ~StallW & ~FlushW;
assign IndexLHRM = {PCM[m+1] ^ PCM[1], PCM[m:2]};
for (index = 0; index < 2**m; index = index +1) begin:localhist
flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateM & (index == IndexLHRM)),
.d(LHRNextW), .q(LHRArray[index]));
end
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
assign LHR = LHRArray[IndexLHRNextF];
// this is global history
//flopenr #(k) LHRReg(clk, reset, ~StallM & ~FlushM & BranchM, LHRNextW, LHR);
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHR, LHRF);
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
endmodule

View File

@ -0,0 +1,135 @@
///////////////////////////////////////////
// localrepairbp
//
// Written: Ross Thompson
// Email: ross1728@gmail.com
// Created: 15 April 2023
//
// Purpose: Local history branch predictor with speculation and repair using CBH.
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module localrepairbp #(parameter XLEN,
parameter m = 6, // 2^m = number of local history branches
parameter k = 10) ( // number of past branches stored
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] BPDirPredD,
output logic BPDirPredWrongE,
// update
input logic [XLEN-1:0] PCNextF, PCE, PCM,
input logic BranchD, BranchE, BranchM, PCSrcE
);
//logic [1:0] BPDirPredD, BPDirPredE;
logic [1:0] BPDirPredE;
logic [1:0] BPDirPredM;
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
logic [k-1:0] LHRNextW;
logic PCSrcM;
logic [2**m-1:0][k-1:0] LHRArray;
logic [m-1:0] IndexLHRNextF, IndexLHRM;
logic [XLEN-1:0] PCW;
logic [k-1:0] LHRCommittedF, LHRSpeculativeF;
logic [m-1:0] IndexLHRD;
logic [k-1:0] LHRNextE;
logic SpeculativeFlushedF;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallD), .ce2(~StallW & ~FlushW),
.ra1(LHRF),
.rd1(BPDirPredD),
.wa2(LHRW),
.wd2(NewBPDirPredW),
.we2(BranchM),
.bwe2(1'b1));
//flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
flopenrc #(2) PredictionRegM(clk, reset, FlushM, ~StallM, BPDirPredE, BPDirPredM);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
//flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
// This is the main difference between global and local history basic implementations. In global,
// the ghr wraps back into itself directly without
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
// this is local history
assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
.ce1(~StallF), .ce2(~StallW & ~FlushW),
.ra1(IndexLHRNextF),
.rd1(LHRCommittedF),
.wa2(IndexLHRM),
.wd2(LHRNextW),
.we2(BranchM),
.bwe2('1));
assign IndexLHRD = {PCE[m+1] ^ PCE[1], PCE[m:2]};
assign LHRNextE = BranchD ? {BPDirPredD[1], LHRE[k-1:1]} : LHRE;
// *** replace with a small CAM
ram2p1r1wbe #(2**m, k) SHB(.clk(clk),
.ce1(~StallF), .ce2(~StallE & ~FlushE),
.ra1(IndexLHRNextF),
.rd1(LHRSpeculativeF),
.wa2(IndexLHRD),
.wd2(LHRNextE),
.we2(BranchD),
.bwe2('1));
// **** replace with small CAM
logic [2**m-1:0] FlushedBits;
always_ff @(posedge clk) begin // Valid bit array,
SpeculativeFlushedF <= #1 FlushedBits[IndexLHRNextF];
if (reset | FlushD) FlushedBits <= #1 '1;
if(BranchD & ~StallE & ~FlushE) begin
FlushedBits[IndexLHRD] <= #1 '0;
end
end
//assign SpeculativeFlushedF = '1;
mux2 #(k) LHRMux(LHRSpeculativeF, LHRCommittedF, SpeculativeFlushedF, LHRF);
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
//flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
//assign LHRF = LHRNextF;
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule

Some files were not shown because too many files have changed in this diff Show More