mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of https://github.com/openhwgroup/cvw into dev
This commit is contained in:
commit
9b8a2303a9
@ -84,6 +84,11 @@ void setStats(int enable)
|
||||
READ_CTR(mhpmcounter10);
|
||||
READ_CTR(mhpmcounter11);
|
||||
READ_CTR(mhpmcounter12);
|
||||
READ_CTR(mhpmcounter13);
|
||||
READ_CTR(mhpmcounter14);
|
||||
READ_CTR(mhpmcounter15);
|
||||
READ_CTR(mhpmcounter16);
|
||||
READ_CTR(mhpmcounter17);
|
||||
|
||||
#undef READ_CTR
|
||||
}
|
||||
@ -167,18 +172,21 @@ void _init(int cid, int nc)
|
||||
counters[12] = read_csr(mhpmcounter12) - counters[12];
|
||||
counters[13] = read_csr(mhpmcounter13) - counters[13];
|
||||
counters[14] = read_csr(mhpmcounter14) - counters[14];
|
||||
counters[15] = read_csr(mhpmcounter15) - counters[15];
|
||||
counters[16] = read_csr(mhpmcounter16) - counters[16];
|
||||
counters[17] = read_csr(mhpmcounter17) - counters[17];
|
||||
|
||||
ee_printf("Load Stalls %d\n", counters[3]);
|
||||
ee_printf("D-Cache Accesses %d\n", counters[11]);
|
||||
ee_printf("D-Cache Misses %d\n", counters[12]);
|
||||
ee_printf("I-Cache Accesses %d\n", counters[13]);
|
||||
ee_printf("I-Cache Misses %d\n", counters[14]);
|
||||
ee_printf("Branches %d\n", counters[5]);
|
||||
ee_printf("Branches Miss Predictions %d\n", counters[4]);
|
||||
ee_printf("BTB Misses %d\n", counters[6]);
|
||||
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
|
||||
ee_printf("RAS Wrong %d\n", counters[8]);
|
||||
ee_printf("Returns %d\n", counters[9]);
|
||||
ee_printf("Load Stalls %d\n", counters[11]);
|
||||
ee_printf("D-Cache Accesses %d\n", counters[13]);
|
||||
ee_printf("D-Cache Misses %d\n", counters[14]);
|
||||
ee_printf("I-Cache Accesses %d\n", counters[16]);
|
||||
ee_printf("I-Cache Misses %d\n", counters[17]);
|
||||
ee_printf("Branches %d\n", counters[3]);
|
||||
ee_printf("Branches Miss Predictions %d\n", counters[7]);
|
||||
ee_printf("BTB Misses %d\n", counters[8]);
|
||||
ee_printf("Jump and JR %d\n", counters[4]);
|
||||
ee_printf("RAS Wrong %d\n", counters[9]);
|
||||
ee_printf("Returns %d\n", counters[5]);
|
||||
ee_printf("BP Class Wrong %d\n", counters[10]);
|
||||
ee_printf("Done printing performance counters\n");
|
||||
|
||||
|
@ -279,12 +279,13 @@ if(sys.argv[1] == '-b'):
|
||||
dct[PredType] = (currSize, currPercent)
|
||||
print(dct)
|
||||
fig, axes = plt.subplots()
|
||||
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
|
||||
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
|
||||
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x', 'tenlocalahead' : '.', 'eightlocalahead' : ',', 'fourlocalahead' : 'x', 'tenlocalrepair' : 'x'}
|
||||
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue', 'tenlocalahead' : 'lightblue', 'eightlocalahead' : 'lightblue', 'fourlocalahead' : 'lightblue', 'tenlocalrepair' : 'lightblue'}
|
||||
for cat in dct:
|
||||
(x, y) = dct[cat]
|
||||
x=[int(2**int(v)) for v in x]
|
||||
print(x, y)
|
||||
#print(x, y)
|
||||
print(cat)
|
||||
axes.plot(x,y, color=colors[cat])
|
||||
axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
|
||||
#plt.scatter(x, y, label=cat)
|
||||
|
157
config/buildroot/config.vh
Normal file
157
config/buildroot/config.vh
Normal file
@ -0,0 +1,157 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// include shared configuration
|
||||
`include "wally-shared.vh"
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 1;
|
||||
localparam QEMU = 0;
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd64;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
localparam MISA = (32'h0014112D);
|
||||
localparam ZICSR_SUPPORTED = 1;
|
||||
localparam ZIFENCEI_SUPPORTED = 1;
|
||||
localparam ZICOUNTERS_SUPPORTED = 1;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 1;
|
||||
localparam ICACHE_SUPPORTED = 1;
|
||||
localparam VIRTMEM_SUPPORTED = 1;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
|
||||
localparam BIGENDIAN_SUPPORTED = 1;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd32;
|
||||
localparam DTLB_ENTRIES = 32'd32;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 1;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd16;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h0000000000001000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h00001FFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h00001FFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 64'h00001000 ;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b1;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b1;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b1;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b1;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd64;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 0;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd0;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd53;
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
|
||||
localparam BPRED_SUPPORTED = 1;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
|
||||
localparam SVADU_SUPPORTED = 1;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -132,6 +132,7 @@
|
||||
`define BPRED_SUPPORTED 1
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
|
||||
|
@ -141,6 +141,7 @@
|
||||
`define BPRED_SUPPORTED 1
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 12
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
|
||||
|
158
config/rv32e/config.vh
Normal file
158
config/rv32e/config.vh
Normal file
@ -0,0 +1,158 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd32;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// E
|
||||
localparam MISA = (32'h00000010);
|
||||
localparam ZICSR_SUPPORTED = 0;
|
||||
localparam ZIFENCEI_SUPPORTED = 0;
|
||||
localparam COUNTERS = 12'd0;
|
||||
localparam ZICOUNTERS_SUPPORTED = 0;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 0;
|
||||
localparam ICACHE_SUPPORTED = 0;
|
||||
localparam VIRTMEM_SUPPORTED = 0;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 0;
|
||||
localparam BIGENDIAN_SUPPORTED = 0;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd0;
|
||||
localparam DTLB_ENTRIES = 32'd0;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd1;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd0;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h80000000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 64'h00001000;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b0;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b0;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b0;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b0;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd32;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 0;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'd4;
|
||||
localparam DIVCOPIES = 32'd4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
||||
|
178
config/rv32e/rv32e-config.vh
Normal file
178
config/rv32e/rv32e-config.vh
Normal file
@ -0,0 +1,178 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
localparam PA_BITS = 34;
|
||||
//localparam AHBW = 32;
|
||||
//localparam XLEN = 32;
|
||||
//localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
|
||||
////localparam BUS_SUPPORTED = 1'b1;
|
||||
//localparam ZICSR_SUPPORTED = 1'b0;
|
||||
localparam M_SUPPORTED = 1'b0;
|
||||
localparam F_SUPPORTED = 1'b0;
|
||||
//localparam ZMMUL_SUPPORTED = 1'b0;
|
||||
//localparam F_SUPPORTED = 1'b0;
|
||||
//localparam PMP_ENTRIES = 0;
|
||||
localparam LLEN = 32;
|
||||
//localparam FPGA = 1'b0;
|
||||
//localparam QEMU = 1'b0;
|
||||
// //VPN_SEGMENT_BITS: (LLEN == 32 ? 10 : 9),
|
||||
// `include "test-shared.vh"
|
||||
localparam FLEN = 32;
|
||||
|
||||
`include "test-shared.vh"
|
||||
|
||||
|
||||
|
||||
// include shared configuration
|
||||
//`include "wally-shared.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// E
|
||||
localparam MISA = (32'h00000010);
|
||||
localparam ZICSR_SUPPORTED = 0;
|
||||
localparam ZIFENCEI_SUPPORTED = 0;
|
||||
localparam COUNTERS = 0;
|
||||
localparam ZICOUNTERS_SUPPORTED = 0;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 0;
|
||||
localparam ICACHE_SUPPORTED = 0;
|
||||
localparam VIRTMEM_SUPPORTED = 0;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 0;
|
||||
localparam BIGENDIAN_SUPPORTED = 0;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 0;
|
||||
localparam DTLB_ENTRIES = 0;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 4096;
|
||||
localparam DCACHE_LINELENINBITS = 512;
|
||||
localparam ICACHE_NUMWAYS = 4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 4096;
|
||||
localparam ICACHE_LINELENINBITS = 512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 1;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 0;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 32'h80000000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 34'h80000000;
|
||||
localparam DTIM_RANGE = 34'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 34'h80000000;
|
||||
localparam IROM_RANGE = 34'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 34'h00001000;
|
||||
localparam BOOTROM_RANGE = 34'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 34'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 34'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 34'h80000000;
|
||||
localparam EXT_MEM_RANGE = 34'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b0;
|
||||
localparam CLINT_BASE = 34'h02000000;
|
||||
localparam CLINT_RANGE = 34'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b0;
|
||||
localparam GPIO_BASE = 34'h10060000;
|
||||
localparam GPIO_RANGE = 34'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b0;
|
||||
localparam UART_BASE = 34'h10000000;
|
||||
localparam UART_RANGE = 34'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b0;
|
||||
localparam PLIC_BASE = 34'h0C000000;
|
||||
localparam PLIC_RANGE = 34'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 34'h00012100;
|
||||
localparam SDC_RANGE = 34'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 3;
|
||||
localparam PLIC_UART_ID = 10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 0;
|
||||
localparam BPRED_TYPE = "BP_GSHARE"; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 10;
|
||||
localparam BTB_SIZE = 10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 4;
|
||||
localparam DIVCOPIES = 4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
@ -136,6 +136,7 @@
|
||||
`define BPRED_SUPPORTED 0
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 0
|
||||
|
158
config/rv32gc/config.vh
Normal file
158
config/rv32gc/config.vh
Normal file
@ -0,0 +1,158 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// include shared configuration
|
||||
// `include "wally-shared.vh"
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd32;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12 | 1 << 0 | 1 <<3 | 1 << 5);
|
||||
localparam ZICSR_SUPPORTED = 1;
|
||||
localparam ZIFENCEI_SUPPORTED = 1;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 1;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 1;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 1;
|
||||
localparam ICACHE_SUPPORTED = 1;
|
||||
localparam VIRTMEM_SUPPORTED = 1;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
|
||||
localparam BIGENDIAN_SUPPORTED = 1;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd32;
|
||||
localparam DTLB_ENTRIES = 32'd32;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 1;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd16;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h80000000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 64'h00001000;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b1;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b1;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b1;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b1;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd32;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 1;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd16;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 1;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'd4;
|
||||
localparam DIVCOPIES = 32'd4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 1;
|
||||
localparam ZBB_SUPPORTED = 1;
|
||||
localparam ZBC_SUPPORTED = 1;
|
||||
localparam ZBS_SUPPORTED = 1;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -133,8 +133,9 @@
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define BPRED_SUPPORTED 1
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 16
|
||||
`define BPRED_NUM_LHR 8
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 1
|
||||
|
157
config/rv32i/config.vh
Normal file
157
config/rv32i/config.vh
Normal file
@ -0,0 +1,157 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd32;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// I
|
||||
localparam MISA = (32'h00000104);
|
||||
localparam ZICSR_SUPPORTED = 0;
|
||||
localparam ZIFENCEI_SUPPORTED = 0;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 0;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 0;
|
||||
localparam DCACHE_SUPPORTED = 0;
|
||||
localparam ICACHE_SUPPORTED = 0;
|
||||
localparam VIRTMEM_SUPPORTED = 0;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
|
||||
localparam BIGENDIAN_SUPPORTED = 0;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd32;
|
||||
localparam DTLB_ENTRIES = 32'd32;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd0;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h80000000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b1;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b1;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b0;
|
||||
localparam BOOTROM_BASE = 64'h00001000;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b0;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b0;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b0;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b0;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b0;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd32;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 0;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -136,6 +136,7 @@
|
||||
`define BPRED_SUPPORTED 0
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 0
|
||||
|
156
config/rv32imc/config.vh
Normal file
156
config/rv32imc/config.vh
Normal file
@ -0,0 +1,156 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd32;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
localparam MISA = (32'h00000104 | 1 << 20 | 1 << 18 | 1 << 12);
|
||||
localparam ZICSR_SUPPORTED = 1;
|
||||
localparam ZIFENCEI_SUPPORTED = 1;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 1;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 0;
|
||||
localparam ICACHE_SUPPORTED = 0;
|
||||
localparam VIRTMEM_SUPPORTED = 0;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
|
||||
localparam BIGENDIAN_SUPPORTED = 0;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd0;
|
||||
localparam DTLB_ENTRIES = 32'd0;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd2;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd0;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h80000000;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
localparam DTIM_SUPPORTED = 1'b1;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b1;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b0;
|
||||
localparam BOOTROM_BASE = 64'h00001000;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b0;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h07FFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b1;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b1;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b1;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b1;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd32;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 0;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -135,6 +135,7 @@
|
||||
`define BPRED_SUPPORTED 0
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 0
|
||||
|
159
config/rv64fpquad/config.vh
Normal file
159
config/rv64fpquad/config.vh
Normal file
@ -0,0 +1,159 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd64;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// MISA RISC-V configuration per specification
|
||||
localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 );
|
||||
localparam ZICSR_SUPPORTED = 1;
|
||||
localparam ZIFENCEI_SUPPORTED = 1;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 1;
|
||||
localparam ZFH_SUPPORTED = 1;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 1;
|
||||
localparam ICACHE_SUPPORTED = 1;
|
||||
localparam VIRTMEM_SUPPORTED = 1;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ;
|
||||
localparam BIGENDIAN_SUPPORTED = 1;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd32;
|
||||
localparam DTLB_ENTRIES = 32'd32;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 1;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd16;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h0000000080000000;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd64;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Physiccal Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b1;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b1;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b1;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b1;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 1;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -138,6 +138,7 @@
|
||||
`define BPRED_SUPPORTED 1
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 0
|
||||
|
162
config/rv64gc/config.vh
Normal file
162
config/rv64gc/config.vh
Normal file
@ -0,0 +1,162 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// include shared configuration
|
||||
// `include "wally-shared.vh"
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd64;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// MISA RISC-V configuration per specification
|
||||
localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0);
|
||||
localparam ZICSR_SUPPORTED = 1;
|
||||
localparam ZIFENCEI_SUPPORTED = 1;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 1;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 1;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 1;
|
||||
localparam DCACHE_SUPPORTED = 1;
|
||||
localparam ICACHE_SUPPORTED = 1;
|
||||
localparam VIRTMEM_SUPPORTED = 1;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
|
||||
localparam BIGENDIAN_SUPPORTED = 1;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd32;
|
||||
localparam DTLB_ENTRIES = 32'd32;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 1;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd16;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h0000000080000000;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = 32'd64;
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Physical Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
localparam DTIM_SUPPORTED = 1'b0;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b0;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b1;
|
||||
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder;
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b1;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b1;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b1;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b1;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b1;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 1;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 1;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 1;
|
||||
localparam ZBB_SUPPORTED = 1;
|
||||
localparam ZBC_SUPPORTED = 1;
|
||||
localparam ZBS_SUPPORTED = 1;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -136,8 +136,10 @@
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define BPRED_SUPPORTED 1
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
//`define BPRED_TYPE "BP_GLOBAL_BASIC" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_TYPE "BP_GSHARE" // "BP_LOCAL_REPAIR" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 4
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 1
|
||||
|
159
config/rv64i/config.vh
Normal file
159
config/rv64i/config.vh
Normal file
@ -0,0 +1,159 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "BranchPredictorType.vh"
|
||||
|
||||
localparam FPGA = 0;
|
||||
localparam QEMU = 0;
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
localparam XLEN = 32'd64;
|
||||
|
||||
// IEEE 754 compliance
|
||||
localparam IEEE754 = 0;
|
||||
|
||||
// MISA RISC-V configuration per specification
|
||||
localparam MISA = (32'h00000104);
|
||||
localparam ZICSR_SUPPORTED = 0;
|
||||
localparam ZIFENCEI_SUPPORTED = 0;
|
||||
localparam COUNTERS = 12'd32;
|
||||
localparam ZICOUNTERS_SUPPORTED = 0;
|
||||
localparam ZFH_SUPPORTED = 0;
|
||||
localparam SSTC_SUPPORTED = 0;
|
||||
|
||||
// LSU microarchitectural Features
|
||||
localparam BUS_SUPPORTED = 0;
|
||||
localparam DCACHE_SUPPORTED = 0;
|
||||
localparam ICACHE_SUPPORTED = 0;
|
||||
localparam VIRTMEM_SUPPORTED = 0;
|
||||
localparam VECTORED_INTERRUPTS_SUPPORTED = 1;
|
||||
localparam BIGENDIAN_SUPPORTED = 0;
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
localparam ITLB_ENTRIES = 32'd0;
|
||||
localparam DTLB_ENTRIES = 32'd0;
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
localparam DCACHE_NUMWAYS = 32'd4;
|
||||
localparam DCACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam DCACHE_LINELENINBITS = 32'd512;
|
||||
localparam ICACHE_NUMWAYS = 32'd4;
|
||||
localparam ICACHE_WAYSIZEINBYTES = 32'd4096;
|
||||
localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd0;
|
||||
|
||||
// Address space
|
||||
localparam RESET_VECTOR = 64'h0000000080000000;
|
||||
|
||||
// Bus Interface width
|
||||
localparam AHBW = (XLEN);
|
||||
|
||||
// WFI Timeout Wait
|
||||
localparam WFI_TIMEOUT_BIT = 32'd16;
|
||||
|
||||
// Peripheral Physiccal Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
localparam DTIM_SUPPORTED = 1'b1;
|
||||
localparam DTIM_BASE = 64'h80000000;
|
||||
localparam DTIM_RANGE = 64'h007FFFFF;
|
||||
localparam IROM_SUPPORTED = 1'b1;
|
||||
localparam IROM_BASE = 64'h80000000;
|
||||
localparam IROM_RANGE = 64'h007FFFFF;
|
||||
localparam BOOTROM_SUPPORTED = 1'b0;
|
||||
localparam BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
localparam BOOTROM_RANGE = 64'h00000FFF;
|
||||
localparam UNCORE_RAM_SUPPORTED = 1'b0;
|
||||
localparam UNCORE_RAM_BASE = 64'h80000000;
|
||||
localparam UNCORE_RAM_RANGE = 64'h7FFFFFFF;
|
||||
localparam EXT_MEM_SUPPORTED = 1'b0;
|
||||
localparam EXT_MEM_BASE = 64'h80000000;
|
||||
localparam EXT_MEM_RANGE = 64'h07FFFFFF;
|
||||
localparam CLINT_SUPPORTED = 1'b0;
|
||||
localparam CLINT_BASE = 64'h02000000;
|
||||
localparam CLINT_RANGE = 64'h0000FFFF;
|
||||
localparam GPIO_SUPPORTED = 1'b0;
|
||||
localparam GPIO_BASE = 64'h10060000;
|
||||
localparam GPIO_RANGE = 64'h000000FF;
|
||||
localparam UART_SUPPORTED = 1'b0;
|
||||
localparam UART_BASE = 64'h10000000;
|
||||
localparam UART_RANGE = 64'h00000007;
|
||||
localparam PLIC_SUPPORTED = 1'b0;
|
||||
localparam PLIC_BASE = 64'h0C000000;
|
||||
localparam PLIC_RANGE = 64'h03FFFFFF;
|
||||
localparam SDC_SUPPORTED = 1'b0;
|
||||
localparam SDC_BASE = 64'h00012100;
|
||||
localparam SDC_RANGE = 64'h0000001F;
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
localparam GPIO_LOOPBACK_TEST = 1;
|
||||
|
||||
// Hardware configuration
|
||||
localparam UART_PRESCALE = 32'd1;
|
||||
|
||||
// Interrupt configuration
|
||||
localparam PLIC_NUM_SRC = 32'd10;
|
||||
// comment out the following if >=32 sources
|
||||
localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32);
|
||||
localparam PLIC_GPIO_ID = 32'd3;
|
||||
localparam PLIC_UART_ID = 32'd10;
|
||||
|
||||
localparam BPRED_SUPPORTED = 0;
|
||||
localparam BranchPredictorType BPRED_TYPE = BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
|
||||
localparam SVADU_SUPPORTED = 0;
|
||||
localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'h4;
|
||||
localparam DIVCOPIES = 32'h4;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 0;
|
||||
localparam ZBB_SUPPORTED = 0;
|
||||
localparam ZBC_SUPPORTED = 0;
|
||||
localparam ZBS_SUPPORTED = 0;
|
||||
|
||||
// Memory synthesis configuration
|
||||
localparam USE_SRAM = 0;
|
||||
|
||||
`include "test-shared.vh"
|
@ -138,6 +138,7 @@
|
||||
`define BPRED_SUPPORTED 0
|
||||
`define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
`define BPRED_SIZE 10
|
||||
`define BPRED_NUM_LHR 6
|
||||
`define BTB_SIZE 10
|
||||
|
||||
`define SVADU_SUPPORTED 0
|
||||
|
3
config/shared/BranchPredictorType.vh
Normal file
3
config/shared/BranchPredictorType.vh
Normal file
@ -0,0 +1,3 @@
|
||||
typedef enum {BP_TWOBIT, BP_GSHARE, BP_GLOBAL, BP_GSHARE_BASIC,
|
||||
BP_GLOBAL_BASIC, BP_LOCAL_BASIC, BP_LOCAL_AHEAD, BP_LOCAL_REPAIR} BranchPredictorType;
|
||||
|
165
config/shared/parameter-defs.vh
Normal file
165
config/shared/parameter-defs.vh
Normal file
@ -0,0 +1,165 @@
|
||||
|
||||
// Populate parameter structure with values specific to the current configuration
|
||||
|
||||
parameter cvw_t P = '{
|
||||
FPGA : FPGA,
|
||||
QEMU : QEMU,
|
||||
XLEN : XLEN,
|
||||
IEEE754 : IEEE754,
|
||||
MISA : MISA,
|
||||
AHBW : AHBW,
|
||||
ZICSR_SUPPORTED : ZICSR_SUPPORTED,
|
||||
ZIFENCEI_SUPPORTED : ZIFENCEI_SUPPORTED,
|
||||
COUNTERS : COUNTERS,
|
||||
ZICOUNTERS_SUPPORTED : ZICOUNTERS_SUPPORTED,
|
||||
ZFH_SUPPORTED : ZFH_SUPPORTED,
|
||||
SSTC_SUPPORTED : SSTC_SUPPORTED,
|
||||
VIRTMEM_SUPPORTED : VIRTMEM_SUPPORTED,
|
||||
VECTORED_INTERRUPTS_SUPPORTED : VECTORED_INTERRUPTS_SUPPORTED,
|
||||
BIGENDIAN_SUPPORTED : BIGENDIAN_SUPPORTED,
|
||||
SVADU_SUPPORTED : SVADU_SUPPORTED,
|
||||
ZMMUL_SUPPORTED : ZMMUL_SUPPORTED,
|
||||
BUS_SUPPORTED : BUS_SUPPORTED,
|
||||
DCACHE_SUPPORTED : DCACHE_SUPPORTED,
|
||||
ICACHE_SUPPORTED : ICACHE_SUPPORTED,
|
||||
ITLB_ENTRIES : ITLB_ENTRIES,
|
||||
DTLB_ENTRIES : DTLB_ENTRIES,
|
||||
DCACHE_NUMWAYS : DCACHE_NUMWAYS,
|
||||
DCACHE_WAYSIZEINBYTES : DCACHE_WAYSIZEINBYTES,
|
||||
DCACHE_LINELENINBITS : DCACHE_LINELENINBITS,
|
||||
ICACHE_NUMWAYS : ICACHE_NUMWAYS,
|
||||
ICACHE_WAYSIZEINBYTES : ICACHE_WAYSIZEINBYTES,
|
||||
ICACHE_LINELENINBITS : ICACHE_LINELENINBITS,
|
||||
IDIV_BITSPERCYCLE : IDIV_BITSPERCYCLE,
|
||||
IDIV_ON_FPU : IDIV_ON_FPU,
|
||||
PMP_ENTRIES : PMP_ENTRIES,
|
||||
RESET_VECTOR : RESET_VECTOR,
|
||||
WFI_TIMEOUT_BIT : WFI_TIMEOUT_BIT,
|
||||
DTIM_SUPPORTED : DTIM_SUPPORTED,
|
||||
DTIM_BASE : DTIM_BASE,
|
||||
DTIM_RANGE : DTIM_RANGE,
|
||||
IROM_SUPPORTED : IROM_SUPPORTED,
|
||||
IROM_BASE : IROM_BASE,
|
||||
IROM_RANGE : IROM_RANGE,
|
||||
BOOTROM_SUPPORTED : BOOTROM_SUPPORTED,
|
||||
BOOTROM_BASE : BOOTROM_BASE,
|
||||
BOOTROM_RANGE : BOOTROM_RANGE,
|
||||
UNCORE_RAM_SUPPORTED : UNCORE_RAM_SUPPORTED,
|
||||
UNCORE_RAM_BASE : UNCORE_RAM_BASE,
|
||||
UNCORE_RAM_RANGE : UNCORE_RAM_RANGE,
|
||||
EXT_MEM_SUPPORTED : EXT_MEM_SUPPORTED,
|
||||
EXT_MEM_BASE : EXT_MEM_BASE,
|
||||
EXT_MEM_RANGE : EXT_MEM_RANGE,
|
||||
CLINT_SUPPORTED : CLINT_SUPPORTED,
|
||||
CLINT_BASE : CLINT_BASE,
|
||||
CLINT_RANGE : CLINT_RANGE,
|
||||
GPIO_SUPPORTED : GPIO_SUPPORTED,
|
||||
GPIO_BASE : GPIO_BASE,
|
||||
GPIO_RANGE : GPIO_RANGE,
|
||||
UART_SUPPORTED : UART_SUPPORTED,
|
||||
UART_BASE : UART_BASE,
|
||||
UART_RANGE : UART_RANGE,
|
||||
PLIC_SUPPORTED : PLIC_SUPPORTED,
|
||||
PLIC_BASE : PLIC_BASE,
|
||||
PLIC_RANGE : PLIC_RANGE,
|
||||
SDC_SUPPORTED : SDC_SUPPORTED,
|
||||
SDC_BASE : SDC_BASE,
|
||||
SDC_RANGE : SDC_RANGE,
|
||||
GPIO_LOOPBACK_TEST : GPIO_LOOPBACK_TEST,
|
||||
UART_PRESCALE : UART_PRESCALE ,
|
||||
PLIC_NUM_SRC : PLIC_NUM_SRC,
|
||||
PLIC_NUM_SRC_LT_32 : PLIC_NUM_SRC_LT_32,
|
||||
PLIC_GPIO_ID : PLIC_GPIO_ID,
|
||||
PLIC_UART_ID : PLIC_UART_ID,
|
||||
BPRED_SUPPORTED : BPRED_SUPPORTED,
|
||||
BPRED_TYPE : BPRED_TYPE,
|
||||
BPRED_SIZE : BPRED_SIZE,
|
||||
BPRED_NUM_LHR : BPRED_NUM_LHR,
|
||||
BTB_SIZE : BTB_SIZE,
|
||||
RADIX : RADIX,
|
||||
DIVCOPIES : DIVCOPIES,
|
||||
ZBA_SUPPORTED : ZBA_SUPPORTED,
|
||||
ZBB_SUPPORTED : ZBB_SUPPORTED,
|
||||
ZBC_SUPPORTED : ZBC_SUPPORTED,
|
||||
ZBS_SUPPORTED : ZBS_SUPPORTED,
|
||||
USE_SRAM : USE_SRAM,
|
||||
M_MODE : M_MODE,
|
||||
S_MODE : S_MODE,
|
||||
U_MODE : U_MODE,
|
||||
VPN_SEGMENT_BITS : VPN_SEGMENT_BITS,
|
||||
VPN_BITS : VPN_BITS,
|
||||
PPN_BITS : PPN_BITS,
|
||||
PA_BITS : PA_BITS,
|
||||
SVMODE_BITS : SVMODE_BITS,
|
||||
ASID_BASE : ASID_BASE,
|
||||
ASID_BITS : ASID_BITS,
|
||||
NO_TRANSLATE : NO_TRANSLATE,
|
||||
SV32 : SV32,
|
||||
SV39 : SV39,
|
||||
SV48 : SV48,
|
||||
A_SUPPORTED : A_SUPPORTED,
|
||||
B_SUPPORTED : B_SUPPORTED,
|
||||
C_SUPPORTED : C_SUPPORTED,
|
||||
D_SUPPORTED : D_SUPPORTED,
|
||||
E_SUPPORTED : E_SUPPORTED,
|
||||
F_SUPPORTED : F_SUPPORTED,
|
||||
I_SUPPORTED : I_SUPPORTED,
|
||||
M_SUPPORTED : M_SUPPORTED,
|
||||
Q_SUPPORTED : Q_SUPPORTED,
|
||||
S_SUPPORTED : S_SUPPORTED,
|
||||
U_SUPPORTED : U_SUPPORTED,
|
||||
LOG_XLEN : LOG_XLEN,
|
||||
PMPCFG_ENTRIES : PMPCFG_ENTRIES,
|
||||
Q_LEN : Q_LEN,
|
||||
Q_NE : Q_NE,
|
||||
Q_NF : Q_NF,
|
||||
Q_BIAS : Q_BIAS,
|
||||
Q_FMT : Q_FMT,
|
||||
D_LEN : D_LEN,
|
||||
D_NE : D_NE,
|
||||
D_NF : D_NF,
|
||||
D_BIAS : D_BIAS,
|
||||
D_FMT : D_FMT,
|
||||
S_LEN : S_LEN,
|
||||
S_NE : S_NE,
|
||||
S_NF : S_NF,
|
||||
S_BIAS : S_BIAS,
|
||||
S_FMT : S_FMT,
|
||||
H_LEN : H_LEN,
|
||||
H_NE : H_NE,
|
||||
H_NF : H_NF,
|
||||
H_BIAS : H_BIAS,
|
||||
H_FMT : H_FMT,
|
||||
FLEN : FLEN,
|
||||
NE : NE ,
|
||||
NF : NF ,
|
||||
FMT : FMT ,
|
||||
BIAS : BIAS,
|
||||
FPSIZES : FPSIZES,
|
||||
FMTBITS : FMTBITS,
|
||||
LEN1 : LEN1 ,
|
||||
NE1 : NE1 ,
|
||||
NF1 : NF1 ,
|
||||
FMT1 : FMT1 ,
|
||||
BIAS1 : BIAS1,
|
||||
LEN2 : LEN2 ,
|
||||
NE2 : NE2 ,
|
||||
NF2 : NF2 ,
|
||||
FMT2 : FMT2 ,
|
||||
BIAS2 : BIAS2,
|
||||
CVTLEN : CVTLEN,
|
||||
LLEN : LLEN,
|
||||
LOGCVTLEN : LOGCVTLEN,
|
||||
NORMSHIFTSZ : NORMSHIFTSZ,
|
||||
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
||||
CORRSHIFTSZ : CORRSHIFTSZ,
|
||||
DIVN : DIVN,
|
||||
LOGR : LOGR,
|
||||
RK : RK,
|
||||
LOGRK : LOGRK,
|
||||
FPDUR : FPDUR,
|
||||
DURLEN : DURLEN,
|
||||
DIVb : DIVb,
|
||||
DIVBLEN : DIVBLEN,
|
||||
DIVa : DIVa
|
||||
};
|
119
config/shared/test-shared.vh
Normal file
119
config/shared/test-shared.vh
Normal file
@ -0,0 +1,119 @@
|
||||
// constants defining different privilege modes
|
||||
// defined in Table 1.1 of the privileged spec
|
||||
localparam M_MODE = (2'b11);
|
||||
localparam S_MODE = (2'b01);
|
||||
localparam U_MODE = (2'b00);
|
||||
|
||||
// Virtual Memory Constants
|
||||
localparam VPN_SEGMENT_BITS = (XLEN == 32 ? 32'd10 : 32'd9);
|
||||
localparam VPN_BITS = (XLEN==32 ? (2*VPN_SEGMENT_BITS) : (4*VPN_SEGMENT_BITS));
|
||||
localparam PPN_BITS = (XLEN==32 ? 32'd22 : 32'd44);
|
||||
localparam PA_BITS = (XLEN==32 ? 32'd34 : 32'd56);
|
||||
localparam SVMODE_BITS = (XLEN==32 ? 32'd1 : 32'd4);
|
||||
localparam ASID_BASE = (XLEN==32 ? 32'd22 : 32'd44);
|
||||
localparam ASID_BITS = (XLEN==32 ? 32'd9 : 32'd16);
|
||||
|
||||
// constants to check SATP_MODE against
|
||||
// defined in Table 4.3 of the privileged spec
|
||||
localparam NO_TRANSLATE = 4'd0;
|
||||
localparam SV32 = 4'd1;
|
||||
localparam SV39 = 4'd8;
|
||||
localparam SV48 = 4'd9;
|
||||
|
||||
// macros to define supported modes
|
||||
localparam A_SUPPORTED = ((MISA >> 0) % 2 == 1);
|
||||
localparam B_SUPPORTED = ((ZBA_SUPPORTED | ZBB_SUPPORTED | ZBC_SUPPORTED | ZBS_SUPPORTED));// not based on MISA
|
||||
localparam C_SUPPORTED = ((MISA >> 2) % 2 == 1);
|
||||
localparam D_SUPPORTED = ((MISA >> 3) % 2 == 1);
|
||||
localparam E_SUPPORTED = ((MISA >> 4) % 2 == 1);
|
||||
localparam F_SUPPORTED = ((MISA >> 5) % 2 == 1);
|
||||
localparam I_SUPPORTED = ((MISA >> 8) % 2 == 1);
|
||||
localparam M_SUPPORTED = ((MISA >> 12) % 2 == 1);
|
||||
localparam Q_SUPPORTED = ((MISA >> 16) % 2 == 1);
|
||||
localparam S_SUPPORTED = ((MISA >> 18) % 2 == 1);
|
||||
localparam U_SUPPORTED = ((MISA >> 20) % 2 == 1);
|
||||
// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
|
||||
|
||||
// logarithm of XLEN, used for number of index bits to select
|
||||
localparam LOG_XLEN = (XLEN == 32 ? 32'd5 : 32'd6);
|
||||
|
||||
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
|
||||
localparam PMPCFG_ENTRIES = (PMP_ENTRIES/32'd8);
|
||||
|
||||
// Floating point constants for Quad, Double, Single, and Half precisions
|
||||
// Lim: I've made some of these 64 bit to avoid width warnings.
|
||||
// If errors crop up, try downsizing back to 32.
|
||||
localparam Q_LEN = 32'd128;
|
||||
localparam Q_NE = 32'd15;
|
||||
localparam Q_NF = 32'd112;
|
||||
localparam Q_BIAS = 32'd16383;
|
||||
localparam Q_FMT = 2'd3;
|
||||
localparam D_LEN = 32'd64;
|
||||
localparam D_NE = 32'd11;
|
||||
localparam D_NF = 32'd52;
|
||||
localparam D_BIAS = 32'd1023;
|
||||
localparam D_FMT = 2'd1;
|
||||
localparam S_LEN = 32'd32;
|
||||
localparam S_NE = 32'd8;
|
||||
localparam S_NF = 32'd23;
|
||||
localparam S_BIAS = 32'd127;
|
||||
localparam S_FMT = 2'd0;
|
||||
localparam H_LEN = 32'd16;
|
||||
localparam H_NE = 32'd5;
|
||||
localparam H_NF = 32'd10;
|
||||
localparam H_BIAS = 32'd15;
|
||||
localparam H_FMT = 2'd2;
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : S_LEN);
|
||||
localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE);
|
||||
localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF);
|
||||
localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0);
|
||||
localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS);
|
||||
/* Delete once tested dh 10/10/22
|
||||
|
||||
localparam FLEN = (Q_SUPPORTED ? Q_LEN : D_SUPPORTED ? D_LEN : F_SUPPORTED ? S_LEN : H_LEN);
|
||||
localparam NE = (Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : F_SUPPORTED ? S_NE : H_NE);
|
||||
localparam NF = (Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : F_SUPPORTED ? S_NF : H_NF);
|
||||
localparam FMT = (Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : F_SUPPORTED ? 2'd0 : 2'd2);
|
||||
localparam BIAS = (Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : F_SUPPORTED ? S_BIAS : H_BIAS);*/
|
||||
|
||||
// Floating point constants needed for FPU paramerterization
|
||||
localparam FPSIZES = ((32)'(Q_SUPPORTED)+(32)'(D_SUPPORTED)+(32)'(F_SUPPORTED)+(32)'(ZFH_SUPPORTED));
|
||||
localparam FMTBITS = ((32)'(FPSIZES>=3)+1);
|
||||
localparam LEN1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_LEN : (F_SUPPORTED & (FLEN != S_LEN)) ? S_LEN : H_LEN);
|
||||
localparam NE1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NE : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NE : H_NE);
|
||||
localparam NF1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_NF : (F_SUPPORTED & (FLEN != S_LEN)) ? S_NF : H_NF);
|
||||
localparam FMT1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? 2'd1 : (F_SUPPORTED & (FLEN != S_LEN)) ? 2'd0 : 2'd2);
|
||||
localparam BIAS1 = ((D_SUPPORTED & (FLEN != D_LEN)) ? D_BIAS : (F_SUPPORTED & (FLEN != S_LEN)) ? S_BIAS : H_BIAS);
|
||||
localparam LEN2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_LEN : H_LEN);
|
||||
localparam NE2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NE : H_NE);
|
||||
localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
|
||||
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
|
||||
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
|
||||
|
||||
// division constants
|
||||
localparam DIVN = (((NF<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
|
||||
localparam LOGR = ($clog2(RADIX)); // r = log(R)
|
||||
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
|
||||
localparam LOGRK = ($clog2(RK)); // log2(r*k)
|
||||
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
|
||||
localparam DURLEN = ($clog2(FPDUR+1));
|
||||
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
|
||||
localparam DIVBLEN = ($clog2(DIVb+1)-1);
|
||||
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu
|
||||
|
||||
// largest length in IEU/FPU
|
||||
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF));
|
||||
localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($unsigned(FLEN)));
|
||||
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
|
||||
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
|
||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
|
||||
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
|
||||
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
/* verilator lint_off STMTDLY */
|
||||
/* verilator lint_off ASSIGNDLY */
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
@ -46,18 +46,33 @@ configs = [
|
||||
)
|
||||
]
|
||||
|
||||
# bpdSize = [6, 8, 10, 12, 14, 16]
|
||||
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
|
||||
# for CurrBPType in bpdType:
|
||||
# for CurrBPSize in bpdSize:
|
||||
# name = CurrBPType+str(CurrBPSize)
|
||||
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
|
||||
# tc = TestCase(
|
||||
# name=name,
|
||||
# variant="rv32gc",
|
||||
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
|
||||
# grepstr="")
|
||||
# configs.append(tc)
|
||||
|
||||
bpdSize = [6, 8, 10, 12, 14, 16]
|
||||
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
|
||||
LHRSize = [4, 8, 10]
|
||||
bpdType = ['local_repair']
|
||||
for CurrBPType in bpdType:
|
||||
for CurrBPSize in bpdSize:
|
||||
name = CurrBPType+str(CurrBPSize)
|
||||
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
|
||||
tc = TestCase(
|
||||
name=name,
|
||||
variant="rv32gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
|
||||
grepstr="")
|
||||
configs.append(tc)
|
||||
for CurrLHRSize in LHRSize:
|
||||
name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
|
||||
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
|
||||
tc = TestCase(
|
||||
name=name,
|
||||
variant="rv32gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
|
||||
grepstr="")
|
||||
configs.append(tc)
|
||||
|
||||
import os
|
||||
from multiprocessing import Pool, TimeoutError
|
||||
|
@ -8,7 +8,7 @@ basepath=$(dirname $0)/..
|
||||
for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do
|
||||
#for config in rv64gc; do
|
||||
echo "$config linting..."
|
||||
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
|
||||
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/wally/cvw.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then
|
||||
echo "Exiting after $config lint due to errors or warnings"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -59,7 +59,7 @@ if {$argc >= 3} {
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined-batch.do ../config/rv32imc rv32imc
|
||||
if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
# start and run simulation
|
||||
if { $coverage } {
|
||||
echo "wally-batch buildroot coverage"
|
||||
@ -88,7 +88,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
exec ./slack-notifier/slack-notifier.py
|
||||
|
||||
} elseif {$2 eq "ahb"} {
|
||||
vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4
|
||||
vlog -lint -work wkdir/work_${1}_${2}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 +define+RAM_LATENCY=$3 +define+BURST_EN=$4
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt wkdir/work_${1}_${2}_${3}_${4}.testbench -work wkdir/work_${1}_${2}_${3}_${4} -G TEST=$2 -o testbenchopt
|
||||
@ -112,7 +112,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
# **** fix this so we can pass any number of +defines.
|
||||
# only allows 3 right now
|
||||
|
||||
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
|
||||
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt
|
||||
@ -126,7 +126,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
# power off -r /dut/core/*
|
||||
|
||||
} else {
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../src/wally/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
if {$coverage} {
|
||||
|
335
sim/wave.do
335
sim/wave.do
@ -6,12 +6,6 @@ add wave -noupdate /testbench/reset
|
||||
add wave -noupdate /testbench/reset_ext
|
||||
add wave -noupdate /testbench/memfilename
|
||||
add wave -noupdate /testbench/dut/core/SATP_REGW
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCD
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCE
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCF
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCM
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld
|
||||
add wave -noupdate /testbench/dut/core/InstrValidM
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr
|
||||
add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex
|
||||
@ -38,15 +32,16 @@ add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv
|
||||
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
|
||||
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
|
||||
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM
|
||||
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushD
|
||||
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushE
|
||||
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushM
|
||||
add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/core/FlushW
|
||||
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF
|
||||
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD
|
||||
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE
|
||||
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM
|
||||
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW
|
||||
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/HPTWInstrAccessFaultM
|
||||
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
|
||||
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
|
||||
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
|
||||
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
|
||||
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF
|
||||
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD
|
||||
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE
|
||||
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM
|
||||
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/PostSpillInstrRawF
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
|
||||
@ -66,10 +61,10 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
|
||||
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
|
||||
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
|
||||
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
|
||||
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
|
||||
@ -99,13 +94,13 @@ add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/STVEC_REGW
|
||||
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FRM_REGW
|
||||
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/FFLAGS_REGW
|
||||
add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/csr/csru/csru/STATUS_FS
|
||||
add wave -noupdate -group Bpred -group {branch update selection inputs} -divider {class check}
|
||||
add wave -noupdate -group Bpred -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
|
||||
add wave -noupdate -group Bpred -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
|
||||
add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -divider {class check}
|
||||
add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
|
||||
add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
|
||||
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
|
||||
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE
|
||||
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCF
|
||||
add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
|
||||
add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
|
||||
@ -120,8 +115,8 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/Result
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUResult
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/BALUControl
|
||||
add wave -noupdate -group alu -divider internals
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
|
||||
@ -152,23 +147,24 @@ add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core
|
||||
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUDisable
|
||||
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/IFUSelect
|
||||
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/LSUSelect
|
||||
add wave -noupdate -group AHB -expand -group multicontroller /testbench/dut/core/ebu/ebu/ebufsmarb/CurrState
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HTRANS
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HBURST
|
||||
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
|
||||
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
|
||||
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
|
||||
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
|
||||
add wave -noupdate -group AHB -expand -group IFU /testbench/dut/core/HRDATA
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUReq
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/HRDATA
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
|
||||
add wave -noupdate -group AHB -expand -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
|
||||
add wave -noupdate -group AHB -expand -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
|
||||
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHTRANS
|
||||
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHADDR
|
||||
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHBURST
|
||||
add wave -noupdate -group AHB -group IFU /testbench/dut/core/ebu/ebu/IFUHREADY
|
||||
add wave -noupdate -group AHB -group IFU /testbench/dut/core/HRDATA
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUReq
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHTRANS
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHSIZE
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHBURST
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHADDR
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/HRDATA
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWRITE
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWSTRB
|
||||
add wave -noupdate -group AHB -group LSU /testbench/dut/core/ebu/ebu/LSUHWDATA
|
||||
add wave -noupdate -group AHB -group LSU -color Pink /testbench/dut/core/lsu/LSUHREADY
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HCLK
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HRESETn
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/ebu/HREADY
|
||||
@ -210,12 +206,12 @@ add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/d
|
||||
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
|
||||
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrE
|
||||
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrM
|
||||
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
|
||||
add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
|
||||
add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
|
||||
add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CAdr
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CacheSet
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} -color {Orange Red} {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]}
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
|
||||
add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
|
||||
@ -235,7 +231,7 @@ add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/PAdr
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU
|
||||
add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn
|
||||
@ -252,60 +248,59 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM[62]}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM[62]}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/we}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CAdr
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidWay}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
|
||||
@ -323,14 +318,7 @@ add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
|
||||
add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteData
|
||||
add wave -noupdate -group lsu -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
|
||||
add wave -noupdate -group lsu -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
|
||||
add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextSet
|
||||
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr
|
||||
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
|
||||
add wave -noupdate -group lsu -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord
|
||||
@ -362,22 +350,27 @@ add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMASt
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWalk
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/WalkerState
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWAdr
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/NextPageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ValidNonLeafPTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/SelHPTW
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWStall
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/DTLBWalk
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/hptw/WalkerState
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWAdr
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/NextPageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/hptw/hptw/ValidNonLeafPTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ITLBWriteF
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/DTLBWriteM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSULoadAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LSUStoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWInstrAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/LoadAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/StoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/ITLBWriteF
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/DTLBWriteM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/DCacheStallM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFaultF
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSULoadAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUStoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LoadAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/StoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFault
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/UARTIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/GPIOIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/uncore/plic/plic/MExtInt
|
||||
@ -396,9 +389,9 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/un
|
||||
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/max_priority_with_irqs
|
||||
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/irqs_at_max_priority
|
||||
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/uncore/plic/plic/threshMask
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsIn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsOut
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOPinsEn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIN
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOOUT
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOEN
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/GPIOIntr
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PSEL
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/uncore/gpio/gpio/PADDR
|
||||
@ -462,7 +455,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/PostSpillInstrRawF
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUStallF
|
||||
add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/Spill/spill/CurrState
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/SpillF
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallD
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallF
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF
|
||||
add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE
|
||||
@ -483,54 +476,38 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and
|
||||
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr
|
||||
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck
|
||||
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/bwe}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/dout}
|
||||
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM}
|
||||
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
|
||||
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF
|
||||
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/VAdr
|
||||
@ -556,21 +533,21 @@ add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/c
|
||||
add wave -noupdate -expand -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Access} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Access} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {I Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]}
|
||||
add wave -noupdate -expand -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]}
|
||||
@ -609,34 +586,22 @@ add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELNoneD
|
||||
add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HSELPLICD
|
||||
add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HRDATA
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/rd
|
||||
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
|
||||
add wave -noupdate -group {branch direction} -expand -group {branch outcome} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCSrcE
|
||||
add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
|
||||
add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushE
|
||||
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
|
||||
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRD
|
||||
add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRE
|
||||
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushD
|
||||
add wave -noupdate -group {branch direction} -expand -group nextghr2 /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
|
||||
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexE
|
||||
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/StallM
|
||||
add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheAccess
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheMiss
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAccess
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM
|
||||
add wave -noupdate /testbench/clk
|
||||
add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH
|
||||
add wave -noupdate /testbench/HPMCSample/EndSample
|
||||
add wave -noupdate /testbench/HPMCSample/StartSample
|
||||
add wave -noupdate /testbench/dut/core/ifu/PCF
|
||||
add wave -noupdate /testbench/reset
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPDirPredD
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BranchM
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/NewBPDirPredM
|
||||
add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {23 ns} 0} {{Cursor 5} {394987 ns} 1}
|
||||
quietly wave cursor active 4
|
||||
WaveRestoreCursors {{Cursor 4} {12208 ns} 1} {{Cursor 4} {435726 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
configure wave -namecolwidth 250
|
||||
configure wave -valuecolwidth 194
|
||||
configure wave -justifyvalue left
|
||||
@ -651,4 +616,4 @@ configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {0 ns} {52 ns}
|
||||
WaveRestoreZoom {435627 ns} {435795 ns}
|
||||
|
14
src/cache/cache.sv
vendored
14
src/cache/cache.sv
vendored
@ -27,9 +27,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
|
||||
module cache #(parameter PA_BITS, XLEN, LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
|
||||
@ -40,7 +38,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
input logic FlushCache, // Flush all dirty lines back to memory
|
||||
input logic InvalidateCache, // Clear all valid bits
|
||||
input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits.
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
|
||||
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
|
||||
output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
|
||||
@ -57,7 +55,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
|
||||
input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
|
||||
output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback)
|
||||
output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
|
||||
output logic [PA_BITS-1:0] CacheBusAdr // Address for bus access
|
||||
);
|
||||
|
||||
// Cache parameters
|
||||
@ -65,7 +63,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
|
||||
localparam SETLEN = $clog2(NUMLINES); // Number of set bits
|
||||
localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
|
||||
localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
|
||||
localparam TAGLEN = PA_BITS - SETTOP; // Number of tag bits
|
||||
localparam CACHEWORDSPERLINE = LINELEN/WORDLEN;// Number of words in cache line
|
||||
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);// Log2 of ^
|
||||
localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
|
||||
@ -114,7 +112,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
AdrSelMuxSel, CacheSet);
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
|
||||
cacheway #(PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
|
||||
.clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
|
||||
.SetValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
|
||||
@ -152,7 +150,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
.PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
|
||||
|
||||
// Bus address for fetch, writeback, or flush writeback
|
||||
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
mux3 #(PA_BITS) CacheBusAdrMux(.d0({PAdr[PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
|
||||
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
|
||||
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
|
||||
|
2
src/cache/cacheLRU.sv
vendored
2
src/cache/cacheLRU.sv
vendored
@ -113,7 +113,7 @@ module cacheLRU
|
||||
|
||||
// The root node of the LRU tree will always be selected in LRUUpdate. No mux needed.
|
||||
assign NextLRU[NUMWAYS-2] = ~WayExpanded[NUMWAYS-2];
|
||||
mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);
|
||||
if (NUMWAYS > 2) mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);
|
||||
|
||||
// Compute next victim way.
|
||||
for(node = NUMWAYS-2; node >= NUMWAYS/2; node--) begin
|
||||
|
16
src/cache/cacheway.sv
vendored
16
src/cache/cacheway.sv
vendored
@ -27,16 +27,14 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
module cacheway #(parameter PA_BITS, XLEN, NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
|
||||
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
|
||||
input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only)
|
||||
input logic SetValid, // Set the valid bit in the selected way and set
|
||||
input logic SetDirty, // Set the dirty bit in the selected way and set
|
||||
@ -54,11 +52,11 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
output logic DirtyWay, // This way is dirty
|
||||
output logic [TAGLEN-1:0] TagWay); // This way's tag if valid
|
||||
|
||||
localparam WORDSPERLINE = LINELEN/`XLEN;
|
||||
localparam WORDSPERLINE = LINELEN/XLEN;
|
||||
localparam BYTESPERLINE = LINELEN/8;
|
||||
localparam LOGWPL = $clog2(WORDSPERLINE);
|
||||
localparam LOGXLENBYTES = $clog2(`XLEN/8);
|
||||
localparam BYTESPERWORD = `XLEN/8;
|
||||
localparam LOGXLENBYTES = $clog2(XLEN/8);
|
||||
localparam BYTESPERWORD = XLEN/8;
|
||||
|
||||
logic [NUMLINES-1:0] ValidBits;
|
||||
logic [NUMLINES-1:0] DirtyBits;
|
||||
@ -113,12 +111,12 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
|
||||
ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
|
||||
.addr(CacheSet), .dout(ReadTag),
|
||||
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
|
||||
.din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
|
||||
|
||||
// AND portion of distributed tag multiplexer
|
||||
assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux
|
||||
assign DirtyWay = SelTag & Dirty & ValidWay;
|
||||
assign HitWay = ValidWay & (ReadTag == PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]);
|
||||
assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Data Array
|
||||
|
@ -27,9 +27,10 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ahbcacheinterface #(
|
||||
parameter AHBW,
|
||||
parameter LLEN,
|
||||
parameter PA_BITS,
|
||||
parameter BEATSPERLINE, // Number of AHBW words (beats) in cacheline
|
||||
parameter AHBWLOGBWPL, // Log2 of ^
|
||||
parameter LINELEN, // Number of bits in cacheline
|
||||
@ -44,14 +45,14 @@ module ahbcacheinterface #(
|
||||
output logic [2:0] HSIZE, // AHB transaction width
|
||||
output logic [2:0] HBURST, // AHB burst length
|
||||
// bus interface buses
|
||||
input logic [`AHBW-1:0] HRDATA, // AHB read data
|
||||
output logic [`PA_BITS-1:0] HADDR, // AHB address
|
||||
output logic [`AHBW-1:0] HWDATA, // AHB write data
|
||||
output logic [`AHBW/8-1:0] HWSTRB, // AHB byte mask
|
||||
input logic [AHBW-1:0] HRDATA, // AHB read data
|
||||
output logic [PA_BITS-1:0] HADDR, // AHB address
|
||||
output logic [AHBW-1:0] HWDATA, // AHB write data
|
||||
output logic [AHBW/8-1:0] HWSTRB, // AHB byte mask
|
||||
|
||||
// cache interface
|
||||
input logic [`PA_BITS-1:0] CacheBusAdr, // Address of cache line
|
||||
input logic [`LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback
|
||||
input logic [PA_BITS-1:0] CacheBusAdr, // Address of cache line
|
||||
input logic [LLEN-1:0] CacheReadDataWordM, // One word of cache line during a writeback
|
||||
input logic CacheableOrFlushCacheM, // Memory operation is cacheable or flushing D$
|
||||
input logic Cacheable, // Memory operation is cachable
|
||||
input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch
|
||||
@ -61,8 +62,8 @@ module ahbcacheinterface #(
|
||||
output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr
|
||||
|
||||
// uncached interface
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
|
||||
input logic [`LLEN-1:0] WriteDataM, // IEU write data for uncached store
|
||||
input logic [PA_BITS-1:0] PAdr, // Physical address of uncached memory operation
|
||||
input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store
|
||||
input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write
|
||||
input logic [2:0] Funct3, // Size of uncached memory operation
|
||||
|
||||
@ -74,11 +75,11 @@ module ahbcacheinterface #(
|
||||
|
||||
|
||||
localparam BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index
|
||||
logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
|
||||
logic [PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation
|
||||
logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage
|
||||
logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA
|
||||
logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
|
||||
logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data
|
||||
logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s
|
||||
logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data
|
||||
|
||||
genvar index;
|
||||
|
||||
@ -86,35 +87,35 @@ module ahbcacheinterface #(
|
||||
for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer
|
||||
logic [BEATSPERLINE-1:0] CaptureBeat;
|
||||
assign CaptureBeat[index] = CaptureEn & (index == BeatCountDelayed);
|
||||
flopen #(`AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
|
||||
.q(FetchBuffer[(index+1)*`AHBW-1:index*`AHBW]));
|
||||
flopen #(AHBW) fb(.clk(HCLK), .en(CaptureBeat[index]), .d(HRDATA),
|
||||
.q(FetchBuffer[(index+1)*AHBW-1:index*AHBW]));
|
||||
end
|
||||
|
||||
mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
|
||||
assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR;
|
||||
mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR);
|
||||
assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR;
|
||||
|
||||
mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
|
||||
mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE));
|
||||
|
||||
// When AHBW is less than LLEN need extra muxes to select the subword from cache's read data.
|
||||
logic [`AHBW-1:0] CacheReadDataWordAHB;
|
||||
logic [AHBW-1:0] CacheReadDataWordAHB;
|
||||
if(LLENPOVERAHBW > 1) begin
|
||||
logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0];
|
||||
logic [AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0];
|
||||
genvar index;
|
||||
for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux
|
||||
assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)];
|
||||
assign AHBWordSets[index] = CacheReadDataWordM[(index*AHBW)+AHBW-1: (index*AHBW)];
|
||||
end
|
||||
assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]];
|
||||
end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0];
|
||||
end else assign CacheReadDataWordAHB = CacheReadDataWordM[AHBW-1:0];
|
||||
|
||||
mux2 #(`AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[`AHBW-1:0]),
|
||||
mux2 #(AHBW) HWDATAMux(.d0(CacheReadDataWordAHB), .d1(WriteDataM[AHBW-1:0]),
|
||||
.s(~(CacheableOrFlushCacheM)), .y(PreHWDATA));
|
||||
flopen #(`AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
|
||||
flopen #(AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
|
||||
|
||||
// *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
|
||||
// probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
|
||||
swbytemask #(`AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
|
||||
swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM));
|
||||
|
||||
flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB);
|
||||
flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB);
|
||||
|
||||
buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm(
|
||||
.HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,
|
||||
|
@ -27,9 +27,8 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ahbinterface #(
|
||||
parameter XLEN,
|
||||
parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits
|
||||
)(
|
||||
input logic HCLK, HRESETn,
|
||||
@ -37,30 +36,30 @@ module ahbinterface #(
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITE, // AHB 0: Read operation 1: Write operation
|
||||
input logic [`XLEN-1:0] HRDATA, // AHB read data
|
||||
output logic [`XLEN-1:0] HWDATA, // AHB write data
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask
|
||||
input logic [XLEN-1:0] HRDATA, // AHB read data
|
||||
output logic [XLEN-1:0] HWDATA, // AHB write data
|
||||
output logic [XLEN/8-1:0] HWSTRB, // AHB byte mask
|
||||
|
||||
// lsu/ifu interface
|
||||
input logic Stall, // Core pipeline is stalled
|
||||
input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting
|
||||
input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write
|
||||
input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word
|
||||
input logic [`XLEN-1:0] WriteData, // IEU write data for a store
|
||||
input logic [XLEN/8-1:0] ByteMask, // Bytes enables within a word
|
||||
input logic [XLEN-1:0] WriteData, // IEU write data for a store
|
||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||
output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
|
||||
output logic [(LSU ? XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
|
||||
);
|
||||
|
||||
logic CaptureEn;
|
||||
localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU
|
||||
localparam LEN = (LSU ? XLEN : 32); // 32 bits for IFU, XLEN for LSU
|
||||
|
||||
flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer));
|
||||
|
||||
if(LSU) begin
|
||||
// delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN
|
||||
flop #(`XLEN) wdreg(HCLK, WriteData, HWDATA);
|
||||
flop #(`XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
|
||||
flop #(XLEN) wdreg(HCLK, WriteData, HWDATA);
|
||||
flop #(XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB);
|
||||
end else begin
|
||||
assign HWDATA = '0;
|
||||
assign HWSTRB = '0;
|
||||
|
@ -27,7 +27,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`define BURST_EN 1 // Enables burst mode. Disable to show the lost performance.
|
||||
|
||||
// HCLK and clk must be the same clock!
|
||||
|
@ -27,8 +27,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
// HCLK and clk must be the same clock!
|
||||
module busfsm (
|
||||
input logic HCLK,
|
||||
|
@ -31,9 +31,8 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module controllerinput #(
|
||||
parameter PA_BITS,
|
||||
parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs
|
||||
)(
|
||||
input logic HCLK,
|
||||
@ -47,14 +46,14 @@ module controllerinput #(
|
||||
input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation
|
||||
input logic [2:0] HSIZEIn, // Manager input. AHB transaction width
|
||||
input logic [2:0] HBURSTIn, // Manager input. AHB burst length
|
||||
input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address
|
||||
input logic [PA_BITS-1:0] HADDRIn, // Manager input. AHB address
|
||||
output logic HREADYOut, // Indicate to manager the peripheral is not busy and another manager does not have priority
|
||||
// controller output
|
||||
output logic [1:0] HTRANSOut, // Arbitrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ
|
||||
output logic HWRITEOut, // Arbitrated manager transaction. AHB 0: Read operation 1: Write operation
|
||||
output logic [2:0] HSIZEOut, // Arbitrated manager transaction. AHB transaction width
|
||||
output logic [2:0] HBURSTOut, // Arbitrated manager transaction. AHB burst length
|
||||
output logic [`PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address
|
||||
output logic [PA_BITS-1:0] HADDROut, // Arbitrated manager transaction. AHB address
|
||||
input logic HREADYIn // Peripheral ready
|
||||
);
|
||||
|
||||
@ -62,13 +61,13 @@ module controllerinput #(
|
||||
logic [2:0] HSIZESave;
|
||||
logic [2:0] HBURSTSave;
|
||||
logic [1:0] HTRANSSave;
|
||||
logic [`PA_BITS-1:0] HADDRSave;
|
||||
logic [PA_BITS-1:0] HADDRSave;
|
||||
|
||||
if (SAVE_ENABLED) begin
|
||||
flopenr #(1+3+3+2+`PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
|
||||
flopenr #(1+3+3+2+PA_BITS) SaveReg(HCLK, ~HRESETn, Save,
|
||||
{HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
|
||||
{HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave});
|
||||
mux2 #(1+3+3+2+`PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
|
||||
mux2 #(1+3+3+2+PA_BITS) RestorMux({HWRITEIn, HSIZEIn, HBURSTIn, HTRANSIn, HADDRIn},
|
||||
{HWRITESave, HSIZESave, HBURSTSave, HTRANSSave, HADDRSave},
|
||||
Restore,
|
||||
{HWRITEOut, HSIZEOut, HBURSTOut, HTRANSOut, HADDROut});
|
||||
|
@ -31,33 +31,31 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ebu (
|
||||
module ebu #(parameter XLEN, PA_BITS, AHBW)(
|
||||
input logic clk, reset,
|
||||
// Signals from IFU
|
||||
input logic [1:0] IFUHTRANS, // IFU AHB transaction request
|
||||
input logic [2:0] IFUHSIZE, // IFU AHB transaction size
|
||||
input logic [2:0] IFUHBURST, // IFU AHB burst length
|
||||
input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address
|
||||
input logic [PA_BITS-1:0] IFUHADDR, // IFU AHB address
|
||||
output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant
|
||||
// Signals from LSU
|
||||
input logic [1:0] LSUHTRANS, // LSU AHB transaction request
|
||||
input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read
|
||||
input logic [2:0] LSUHSIZE, // LSU AHB size
|
||||
input logic [2:0] LSUHBURST, // LSU AHB burst length
|
||||
input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address
|
||||
input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
|
||||
input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
|
||||
input logic [PA_BITS-1:0] LSUHADDR, // LSU AHB address
|
||||
input logic [XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN
|
||||
input logic [XLEN/8-1:0] LSUHWSTRB, // AHB byte mask
|
||||
output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority
|
||||
|
||||
// AHB-Lite external signals
|
||||
output logic HCLK, HRESETn,
|
||||
input logic HREADY, // AHB peripheral ready
|
||||
input logic HRESP, // AHB peripheral response. 0: OK 1: Error
|
||||
output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
|
||||
output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration
|
||||
output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
|
||||
output logic [PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration
|
||||
output logic [AHBW-1:0] HWDATA, // AHB Write data after arbitration
|
||||
output logic [XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration
|
||||
output logic HWRITE, // AHB transaction direction after arbitration
|
||||
output logic [2:0] HSIZE, // AHB transaction size after arbitration
|
||||
output logic [2:0] HBURST, // AHB burst length after arbitration
|
||||
@ -73,13 +71,13 @@ module ebu (
|
||||
logic IFUDisable;
|
||||
logic IFUSelect;
|
||||
|
||||
logic [`PA_BITS-1:0] IFUHADDROut;
|
||||
logic [PA_BITS-1:0] IFUHADDROut;
|
||||
logic [1:0] IFUHTRANSOut;
|
||||
logic [2:0] IFUHBURSTOut;
|
||||
logic [2:0] IFUHSIZEOut;
|
||||
logic IFUHWRITEOut;
|
||||
|
||||
logic [`PA_BITS-1:0] LSUHADDROut;
|
||||
logic [PA_BITS-1:0] LSUHADDROut;
|
||||
logic [1:0] LSUHTRANSOut;
|
||||
logic [2:0] LSUHBURSTOut;
|
||||
logic [2:0] LSUHSIZEOut;
|
||||
@ -98,14 +96,14 @@ module ebu (
|
||||
// input stages and muxing for IFU and LSU
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
controllerinput IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
|
||||
controllerinput #(PA_BITS) IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable),
|
||||
.Request(IFUReq),
|
||||
.HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR),
|
||||
.HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY),
|
||||
.HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY));
|
||||
|
||||
// LSU always has priority so there should never be a need to save and restore the address phase inputs.
|
||||
controllerinput #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
|
||||
controllerinput #(PA_BITS, 0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable),
|
||||
.Request(LSUReq),
|
||||
.HWRITEIn(LSUHWRITE), .HSIZEIn(LSUHSIZE), .HBURSTIn(LSUHBURST), .HTRANSIn(LSUHTRANS), .HADDRIn(LSUHADDR), .HREADYOut(LSUHREADY),
|
||||
.HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut),
|
||||
|
@ -28,8 +28,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ebufsmarb (
|
||||
input logic HCLK,
|
||||
input logic HRESETn,
|
||||
|
@ -25,16 +25,15 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
module fclassify import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // sign bit
|
||||
input logic XNaN, // is NaN
|
||||
input logic XSNaN, // is signaling NaN
|
||||
input logic XSubnorm, // is Subnormal
|
||||
input logic XZero, // is zero
|
||||
input logic XInf, // is infinity
|
||||
output logic [`XLEN-1:0] ClassRes // classify result
|
||||
output logic [P.XLEN-1:0] ClassRes // classify result
|
||||
);
|
||||
|
||||
logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal
|
||||
@ -63,6 +62,6 @@ module fclassify (
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
|
||||
assign ClassRes = {{P.XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
||||
|
@ -27,8 +27,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
// OpCtrl values
|
||||
// 110 min
|
||||
// 101 max
|
||||
@ -36,23 +34,23 @@
|
||||
// 001 less than
|
||||
// 011 less than or equal
|
||||
|
||||
module fcmp (
|
||||
input logic [`FMTBITS-1:0] Fmt, // format of fp number
|
||||
module fcmp import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt, // format of fp number
|
||||
input logic [2:0] OpCtrl, // see above table
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [`NE-1:0] Xe, Ye, // input exponents
|
||||
input logic [`NF:0] Xm, Ym, // input mantissa
|
||||
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||
input logic [P.NF:0] Xm, Ym, // input mantissa
|
||||
input logic XZero, YZero, // is zero
|
||||
input logic XNaN, YNaN, // is NaN
|
||||
input logic XSNaN, YSNaN, // is signaling NaN
|
||||
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
|
||||
input logic [P.FLEN-1:0] X, Y, // original inputs (before unpacker)
|
||||
output logic CmpNV, // invalid flag
|
||||
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
|
||||
output logic [`XLEN-1:0] CmpIntRes // compare integer result
|
||||
output logic [P.FLEN-1:0] CmpFpRes, // compare floating-point result
|
||||
output logic [P.XLEN-1:0] CmpIntRes // compare integer result
|
||||
);
|
||||
|
||||
logic LTabs, LT, EQ; // is X < or > or = Y
|
||||
logic [`FLEN-1:0] NaNRes; // NaN result
|
||||
logic [P.FLEN-1:0] NaNRes; // NaN result
|
||||
logic BothZero; // are both inputs zero
|
||||
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
|
||||
|
||||
@ -85,44 +83,44 @@ module fcmp (
|
||||
// for RISC-V, return the canonical NaN
|
||||
|
||||
// select the NaN result
|
||||
if (`FPSIZES == 1)
|
||||
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
if (P.FPSIZES == 1)
|
||||
if(P.IEEE754) assign NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
else assign NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
|
||||
else if (`FPSIZES == 2)
|
||||
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
else if (P.FPSIZES == 2)
|
||||
if(P.IEEE754) assign NaNRes = Fmt ? {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
else assign NaNRes = Fmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
|
||||
else if (`FPSIZES == 3)
|
||||
else if (P.FPSIZES == 3)
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT:
|
||||
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
`FMT1:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
`FMT2:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
|
||||
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
default: NaNRes = {`FLEN{1'bx}};
|
||||
P.FMT:
|
||||
if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
P.FMT1:
|
||||
if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN1{1'b1}}, Xs, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
else NaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
P.FMT2:
|
||||
if(P.IEEE754) NaNRes = {{P.FLEN-P.LEN2{1'b1}}, Xs, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
|
||||
else NaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
default: NaNRes = {P.FLEN{1'bx}};
|
||||
endcase
|
||||
|
||||
else if (`FPSIZES == 4)
|
||||
else if (P.FPSIZES == 4)
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3:
|
||||
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
if(P.IEEE754) NaNRes = {Xs, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
else NaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
2'h1:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
|
||||
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
if(P.IEEE754) NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, Xs, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
|
||||
else NaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
2'h0:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
|
||||
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
if(P.IEEE754) NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, Xs, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
|
||||
else NaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
2'h2:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
|
||||
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
if(P.IEEE754) NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, Xs, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
|
||||
else NaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
endcase
|
||||
|
||||
|
||||
@ -155,6 +153,6 @@ module fcmp (
|
||||
// - -0 = 0
|
||||
// - inf = inf and -inf = -inf
|
||||
// - return 0 if comparison with NaN (unordered)
|
||||
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
|
||||
assign CmpIntRes = {(P.XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
|
||||
|
||||
endmodule
|
||||
|
@ -25,9 +25,8 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fctrl (
|
||||
module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// input control signals
|
||||
@ -49,7 +48,7 @@ module fctrl (
|
||||
// opperation mux selections
|
||||
output logic FCvtIntE, FCvtIntW, // convert to integer opperation
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic FpLoadStoreM, // FP load or store instruction
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
@ -74,7 +73,7 @@ module fctrl (
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
logic [P.FMTBITS-1:0] FmtD; // FP format
|
||||
logic [1:0] Fmt, Fmt2; // format - before possible reduction
|
||||
logic SupportedFmt; // is the format supported
|
||||
logic SupportedFmt2; // is the source format supported for fp -> fp
|
||||
@ -84,10 +83,10 @@ module fctrl (
|
||||
assign Fmt = Funct7D[1:0];
|
||||
assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp
|
||||
|
||||
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
|
||||
(Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
|
||||
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
|
||||
(Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));
|
||||
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) |
|
||||
(Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED));
|
||||
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) |
|
||||
(Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));
|
||||
|
||||
// decode the instruction
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
|
||||
@ -102,15 +101,15 @@ module fctrl (
|
||||
case(OpD)
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
|
||||
@ -227,14 +226,14 @@ module fctrl (
|
||||
// 10 - half
|
||||
// 11 - quad
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
if (P.FPSIZES == 1)
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
else if (P.FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
assign FmtD = (P.FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
else if (P.FPSIZES == 3|P.FPSIZES == 4)
|
||||
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
|
||||
// Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
|
||||
@ -313,7 +312,7 @@ module fctrl (
|
||||
assign Adr3D = InstrD[31:27];
|
||||
|
||||
// D/E pipleine register
|
||||
flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
|
||||
@ -321,11 +320,11 @@ module fctrl (
|
||||
flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
|
||||
|
||||
// Integer division on FPU divider
|
||||
if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
|
||||
if (P.M_SUPPORTED & P.IDIV_ON_FPU) assign IDivStartE = IntDivE;
|
||||
else assign IDivStartE = 0;
|
||||
|
||||
// E/M pipleine register
|
||||
flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM});
|
||||
|
||||
|
106
src/fpu/fcvt.sv
106
src/fpu/fcvt.sv
@ -27,23 +27,21 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fcvt (
|
||||
module fcvt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // input's sign
|
||||
input logic [`NE-1:0] Xe, // input's exponent
|
||||
input logic [`NF:0] Xm, // input's fraction
|
||||
input logic [`XLEN-1:0] Int, // integer input - from IEU
|
||||
input logic [P.NE-1:0] Xe, // input's exponent
|
||||
input logic [P.NF:0] Xm, // input's fraction
|
||||
input logic [P.XLEN-1:0] Int, // integer input - from IEU
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic ToInt, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZero, // is the input zero
|
||||
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`NE:0] Ce, // the calculated expoent
|
||||
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
|
||||
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [P.NE:0] Ce, // the calculated expoent
|
||||
output logic [P.LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
|
||||
output logic ResSubnormUf,// does the result underflow or is subnormal
|
||||
output logic Cs, // the result's sign
|
||||
output logic IntZero, // is the integer zero?
|
||||
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
|
||||
output logic [P.CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
@ -56,16 +54,16 @@ module fcvt (
|
||||
// bit 2 bit 1 bit 0
|
||||
// for example: signed long -> single floating point has the OpCode 101
|
||||
|
||||
logic [`FMTBITS-1:0] OutFmt; // format of the output
|
||||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`NE-2:0] NewBias; // the bias of the final result
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic [P.FMTBITS-1:0] OutFmt; // format of the output
|
||||
logic [P.XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [P.XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [P.NE-2:0] NewBias; // the bias of the final result
|
||||
logic [P.NE-1:0] OldExp; // the old exponent
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
|
||||
logic [P.CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [P.LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
@ -76,9 +74,9 @@ module fcvt (
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - int -> fp: Fmt contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
if (P.FPSIZES == 2)
|
||||
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == P.FMT);
|
||||
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
|
||||
|
||||
|
||||
@ -89,7 +87,7 @@ module fcvt (
|
||||
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
|
||||
|
||||
assign PosInt = Cs ? -Int : Int;
|
||||
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
assign TrimInt = {{P.XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
assign IntZero = ~|TrimInt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -99,13 +97,13 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
|
||||
{Xm, {`CVTLEN-`NF{1'b0}}};
|
||||
assign LzcInFull = IntToFp ? {TrimInt, {P.CVTLEN-P.XLEN+1{1'b0}}} :
|
||||
{Xm, {P.CVTLEN-P.NF{1'b0}}};
|
||||
|
||||
// used as shifter input in postprocessor
|
||||
assign LzcIn = LzcInFull[`CVTLEN-1:0];
|
||||
assign LzcIn = LzcInFull[P.CVTLEN-1:0];
|
||||
|
||||
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
|
||||
lzc #(P.CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
@ -114,42 +112,42 @@ module fcvt (
|
||||
// Select the bias of the output
|
||||
// fp -> int : select 1
|
||||
// ??? -> fp : pick the new bias depending on the output format
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign NewBias = ToInt ? (P.NE-1)'(1) : (P.NE-1)'(P.BIAS);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
logic [P.NE-2:0] NewBiasToFp;
|
||||
assign NewBiasToFp = OutFmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
logic [P.NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
|
||||
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
|
||||
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
|
||||
default: NewBiasToFp = {`NE-1{1'bx}};
|
||||
P.FMT: NewBiasToFp = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: NewBiasToFp = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: NewBiasToFp = (P.NE-1)'(P.BIAS2);
|
||||
default: NewBiasToFp = {P.NE-1{1'bx}};
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
logic [P.NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
|
||||
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
|
||||
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
|
||||
2'h3: NewBiasToFp = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: NewBiasToFp = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: NewBiasToFp = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: NewBiasToFp = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
assign NewBias = ToInt ? (P.NE-1)'(1) : NewBiasToFp;
|
||||
end
|
||||
|
||||
|
||||
// select the old exponent
|
||||
// int -> fp : largest bias + XLEN-1
|
||||
// fp -> ??? : XExp
|
||||
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
|
||||
assign OldExp = IntToFp ? (P.NE)'(P.BIAS)+(P.NE)'(P.XLEN-1) : Xe;
|
||||
|
||||
// calculate CalcExp
|
||||
// fp -> fp :
|
||||
@ -159,13 +157,13 @@ module fcvt (
|
||||
// - correct the expoent when there is a normalization shift ( + LeadingZeros+1)
|
||||
// - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction
|
||||
// fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1)
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// | P.XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// | P.XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// | P.XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
@ -185,13 +183,13 @@ module fcvt (
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
// oldexp - biasold - LeadingZeros + newbias
|
||||
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
|
||||
assign Ce = {1'b0, OldExp} - (P.NE+1)'(P.BIAS) - {{P.NE-P.LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
|
||||
|
||||
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp;
|
||||
assign ResSubnormUf = (~|Ce | Ce[P.NE])&~XZero&~IntToFp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -211,8 +209,8 @@ module fcvt (
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
always_comb
|
||||
if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
|
||||
else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
|
||||
if(ToInt) ShiftAmt = Ce[P.LOGCVTLEN-1:0]&{P.LOGCVTLEN{~Ce[P.NE]}};
|
||||
else if (ResSubnormUf) ShiftAmt = (P.LOGCVTLEN)'(P.NF-1)+Ce[P.LOGCVTLEN-1:0];
|
||||
else ShiftAmt = LeadingZeros;
|
||||
|
||||
|
||||
@ -227,7 +225,7 @@ module fcvt (
|
||||
// - otherwise: the floating point input's sign
|
||||
always_comb
|
||||
if(IntToFp)
|
||||
if(Int64) Cs = Int[`XLEN-1]&Signed;
|
||||
if(Int64) Cs = Int[P.XLEN-1]&Signed;
|
||||
else Cs = Int[31]&Signed;
|
||||
else Cs = Xs;
|
||||
|
||||
|
@ -26,15 +26,13 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrt(
|
||||
module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE,
|
||||
input logic [`NF:0] XmE, YmE,
|
||||
input logic [`NE-1:0] XeE, YeE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
@ -42,39 +40,39 @@ module fdivsqrt(
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
output logic [`XLEN-1:0] FIntDivResultM
|
||||
output logic [P.NE+1:0] QeM,
|
||||
output logic [P.DIVb:0] QmM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [`DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [`DIVb+3:0] D; // Iterator Divisor
|
||||
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [`DIVb+1:0] FirstC; // Step tracker
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [`DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [`DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
||||
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
@ -82,18 +80,18 @@ module fdivsqrt(
|
||||
.BZeroM, .nM, .mM, .AM,
|
||||
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
||||
|
||||
fdivsqrtfsm fdivsqrtfsm( // FSM
|
||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
||||
// Int-specific
|
||||
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
||||
|
||||
fdivsqrtiter fdivsqrtiter( // CSA Iterator
|
||||
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
|
||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
||||
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
|
||||
|
||||
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor
|
||||
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||
.QmM, .WZeroE, .DivStickyM,
|
||||
|
@ -26,51 +26,49 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtcycles(
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic IntDivE,
|
||||
input logic [`DIVBLEN:0] nE,
|
||||
output logic [`DURLEN-1:0] CyclesE
|
||||
input logic [P.DIVBLEN:0] nE,
|
||||
output logic [P.DURLEN-1:0] CyclesE
|
||||
);
|
||||
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
|
||||
// DIVN = `NF+3
|
||||
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
|
||||
// DIVN = P.NF+3
|
||||
// NS = NF + 1
|
||||
// N = NS or NS+2 for div/sqrt.
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
if (`FPSIZES == 1)
|
||||
assign Nf = `NF;
|
||||
else if (`FPSIZES == 2)
|
||||
if (P.FPSIZES == 1)
|
||||
assign Nf = P.NF;
|
||||
else if (P.FPSIZES == 2)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
1'b0: Nf = `NF1;
|
||||
1'b1: Nf = `NF;
|
||||
1'b0: Nf = P.NF1;
|
||||
1'b1: Nf = P.NF;
|
||||
endcase
|
||||
else if (`FPSIZES == 3)
|
||||
else if (P.FPSIZES == 3)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Nf = `NF;
|
||||
`FMT1: Nf = `NF1;
|
||||
`FMT2: Nf = `NF2;
|
||||
P.FMT: Nf = P.NF;
|
||||
P.FMT1: Nf = P.NF1;
|
||||
P.FMT2: Nf = P.NF2;
|
||||
endcase
|
||||
else if (`FPSIZES == 4)
|
||||
else if (P.FPSIZES == 4)
|
||||
always_comb
|
||||
case(FmtE)
|
||||
`S_FMT: Nf = `S_NF;
|
||||
`D_FMT: Nf = `D_NF;
|
||||
`H_FMT: Nf = `H_NF;
|
||||
`Q_FMT: Nf = `Q_NF;
|
||||
P.S_FMT: Nf = P.S_NF;
|
||||
P.D_FMT: Nf = P.D_NF;
|
||||
P.H_FMT: Nf = P.H_NF;
|
||||
P.Q_FMT: Nf = P.Q_NF;
|
||||
endcase
|
||||
|
||||
always_comb begin
|
||||
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
|
||||
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
||||
if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
|
||||
else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
|
||||
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
||||
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||
end
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,49 +26,47 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtexpcalc(
|
||||
input logic [`FMTBITS-1:0] Fmt,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic Sqrt,
|
||||
input logic XZero,
|
||||
input logic [`DIVBLEN:0] ell, m,
|
||||
output logic [`NE+1:0] Qe
|
||||
input logic [P.DIVBLEN:0] ell, m,
|
||||
output logic [P.NE+1:0] Qe
|
||||
);
|
||||
logic [`NE-2:0] Bias;
|
||||
logic [`NE+1:0] SXExp;
|
||||
logic [`NE+1:0] SExp;
|
||||
logic [`NE+1:0] DExp;
|
||||
logic [P.NE-2:0] Bias;
|
||||
logic [P.NE+1:0] SXExp;
|
||||
logic [P.NE+1:0] SExp;
|
||||
logic [P.NE+1:0] DExp;
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Bias = (`NE-1)'(`BIAS);
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: Bias = (`NE-1)'(`BIAS);
|
||||
`FMT1: Bias = (`NE-1)'(`BIAS1);
|
||||
`FMT2: Bias = (`NE-1)'(`BIAS2);
|
||||
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Bias = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: Bias = (`NE-1)'(`D_BIAS);
|
||||
2'h0: Bias = (`NE-1)'(`S_BIAS);
|
||||
2'h2: Bias = (`NE-1)'(`H_BIAS);
|
||||
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
|
||||
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
|
||||
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
// correct exponent for subnormal input's normalization shifts
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
assign Qe = Sqrt ? SExp : DExp;
|
||||
endmodule
|
||||
|
@ -26,14 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtfgen2 (
|
||||
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic up, uz,
|
||||
input logic [`DIVb+3:0] C, U, UM,
|
||||
output logic [`DIVb+3:0] F
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
);
|
||||
logic [`DIVb+3:0] FP, FN, FZ;
|
||||
logic [P.DIVb+3:0] FP, FN, FZ;
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
assign FP = ~(U << 1) & C;
|
||||
|
@ -26,14 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtfgen4 (
|
||||
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3:0] udigit,
|
||||
input logic [`DIVb+3:0] C, U, UM,
|
||||
output logic [`DIVb+3:0] F
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
);
|
||||
logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
|
||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
assign F2 = (~U << 2) & (C << 2);
|
||||
@ -49,4 +47,4 @@ module fdivsqrtfgen4 (
|
||||
else if (udigit[1]) F = FN1;
|
||||
else if (udigit[0]) F = FN2;
|
||||
else F = F0;
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,9 +26,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtfsm(
|
||||
module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
@ -39,7 +37,7 @@ module fdivsqrtfsm(
|
||||
input logic StallM, FlushE,
|
||||
input logic IntDivE,
|
||||
input logic ISpecialCaseE,
|
||||
input logic [`DURLEN-1:0] CyclesE,
|
||||
input logic [P.DURLEN-1:0] CyclesE,
|
||||
output logic IFDivStartE,
|
||||
output logic FDivBusyE, FDivDoneE,
|
||||
output logic SpecialCaseM
|
||||
@ -49,16 +47,16 @@ module fdivsqrtfsm(
|
||||
statetype state;
|
||||
|
||||
logic SpecialCaseE, FSpecialCaseE;
|
||||
logic [`DURLEN-1:0] step;
|
||||
logic [P.DURLEN-1:0] step;
|
||||
|
||||
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
|
||||
assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
|
||||
assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
|
||||
assign FDivDoneE = (state == DONE);
|
||||
assign FDivBusyE = (state == BUSY) | IFDivStartE;
|
||||
|
||||
// terminate immediately on special cases
|
||||
assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
||||
if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||
|
||||
@ -78,4 +76,4 @@ module fdivsqrtfsm(
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,38 +26,36 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtiter(
|
||||
module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic FDivBusyE,
|
||||
input logic SqrtE,
|
||||
input logic [`DIVb+3:0] X, D,
|
||||
output logic [`DIVb:0] FirstU, FirstUM,
|
||||
output logic [`DIVb+1:0] FirstC,
|
||||
input logic [P.DIVb+3:0] X, D,
|
||||
output logic [P.DIVb:0] FirstU, FirstUM,
|
||||
output logic [P.DIVb+1:0] FirstC,
|
||||
output logic Firstun,
|
||||
output logic [`DIVb+3:0] FirstWS, FirstWC
|
||||
output logic [P.DIVb+3:0] FirstWS, FirstWC
|
||||
);
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
|
||||
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
|
||||
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
|
||||
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
|
||||
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
|
||||
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
|
||||
logic [`DIVb+1:0] initC; // Q2.b
|
||||
logic [`DIVCOPIES-1:0] un;
|
||||
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
|
||||
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
|
||||
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
|
||||
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
|
||||
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
|
||||
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
|
||||
logic [P.DIVb+1:0] initC; // Q2.b
|
||||
logic [P.DIVCOPIES-1:0] un;
|
||||
|
||||
logic [`DIVb+3:0] WSN, WCN; // Q4.b
|
||||
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||
logic [`DIVb+1:0] NextC;
|
||||
logic [`DIVb:0] UMux, UMMux;
|
||||
logic [`DIVb:0] initU, initUM;
|
||||
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
|
||||
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||
logic [P.DIVb+1:0] NextC;
|
||||
logic [P.DIVb:0] UMux, UMMux;
|
||||
logic [P.DIVb:0] initU, initUM;
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
// Top Muxes and Registers
|
||||
@ -66,36 +64,36 @@ module fdivsqrtiter(
|
||||
// are fed back for the next iteration.
|
||||
|
||||
// Residual WS/SC registers/initializaiton mux
|
||||
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
|
||||
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
|
||||
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
|
||||
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
|
||||
mux2 #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN);
|
||||
mux2 #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN);
|
||||
flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
|
||||
flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
|
||||
|
||||
// UOTFC Result U and UM registers/initialization mux
|
||||
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
|
||||
assign initU = {SqrtE, {(`DIVb){1'b0}}};
|
||||
assign initUM = {~SqrtE, {(`DIVb){1'b0}}};
|
||||
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
|
||||
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
||||
flopen #(`DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
|
||||
flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
|
||||
assign initU = {SqrtE, {(P.DIVb){1'b0}}};
|
||||
assign initUM = {~SqrtE, {(P.DIVb){1'b0}}};
|
||||
mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux);
|
||||
mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
||||
flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
|
||||
flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
|
||||
|
||||
// C register/initialization mux
|
||||
// Initialize C to -1 for sqrt and -R for division
|
||||
logic [1:0] initCUpper;
|
||||
if(`RADIX == 4) begin
|
||||
if(P.RADIX == 4) begin
|
||||
mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
|
||||
end else begin
|
||||
mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
|
||||
end
|
||||
|
||||
assign initC = {initCUpper, {`DIVb{1'b0}}};
|
||||
mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC);
|
||||
flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
|
||||
assign initC = {initCUpper, {P.DIVb{1'b0}}};
|
||||
mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC);
|
||||
flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
|
||||
|
||||
// Divisor Selections
|
||||
assign DBar = ~D; // for -D
|
||||
if(`RADIX == 4) begin : d2
|
||||
if(P.RADIX == 4) begin : d2
|
||||
assign D2 = D << 1; // for 2D, only used in R4
|
||||
assign DBar2 = ~D2; // for -2D, only used in R4
|
||||
end
|
||||
@ -103,15 +101,15 @@ module fdivsqrtiter(
|
||||
// k=DIVCOPIES of the recurrence logic
|
||||
genvar i;
|
||||
generate
|
||||
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
|
||||
if (`RADIX == 2) begin: stage
|
||||
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
||||
if (P.RADIX == 2) begin: stage
|
||||
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end else begin: stage
|
||||
logic j1;
|
||||
assign j1 = (i == 0 & ~C[0][`DIVb-1]);
|
||||
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
||||
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end
|
||||
|
@ -26,51 +26,49 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtpostproc(
|
||||
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic StallM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+3:0] D,
|
||||
input logic [`DIVb:0] FirstU, FirstUM,
|
||||
input logic [`DIVb+1:0] FirstC,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+3:0] D,
|
||||
input logic [P.DIVb:0] FirstU, FirstUM,
|
||||
input logic [P.DIVb+1:0] FirstC,
|
||||
input logic SqrtE,
|
||||
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
|
||||
input logic [`XLEN-1:0] AM,
|
||||
input logic [P.XLEN-1:0] AM,
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
|
||||
input logic [`DIVBLEN:0] nM, mM,
|
||||
output logic [`DIVb:0] QmM,
|
||||
input logic [P.DIVBLEN:0] nM, mM,
|
||||
output logic [P.DIVb:0] QmM,
|
||||
output logic WZeroE,
|
||||
output logic DivStickyM,
|
||||
output logic [`XLEN-1:0] FIntDivResultM
|
||||
output logic [P.XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] W, Sum;
|
||||
logic [`DIVb:0] PreQmM;
|
||||
logic [P.DIVb+3:0] W, Sum;
|
||||
logic [P.DIVb:0] PreQmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [`XLEN-1:0] IntDivResultM;
|
||||
logic [P.XLEN-1:0] IntDivResultM;
|
||||
|
||||
//////////////////////////
|
||||
// Execute Stage: Detect early termination for an exact result
|
||||
//////////////////////////
|
||||
|
||||
// check for early termination on an exact result.
|
||||
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E);
|
||||
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
|
||||
|
||||
if (`RADIX == 2) begin: R2EarlyTerm
|
||||
logic [`DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
||||
logic [`DIVb+2:0] FirstK;
|
||||
if (P.RADIX == 2) begin: R2EarlyTerm
|
||||
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
||||
logic [P.DIVb+2:0] FirstK;
|
||||
logic wfeq0E;
|
||||
logic [`DIVb+3:0] WCF, WSF;
|
||||
logic [P.DIVb+3:0] WCF, WSF;
|
||||
|
||||
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
|
||||
assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
||||
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
||||
assign FZeroDivE = D << 1; // F for divide
|
||||
mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
||||
csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
||||
aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
||||
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
||||
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
||||
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
||||
assign WZeroE = weq0E|(wfeq0E & Firstun);
|
||||
end else begin
|
||||
assign WZeroE = weq0E;
|
||||
@ -91,27 +89,27 @@ module fdivsqrtpostproc(
|
||||
|
||||
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
|
||||
assign Sum = WC + WS;
|
||||
assign NegStickyM = Sum[`DIVb+3];
|
||||
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||
assign NegStickyM = Sum[P.DIVb+3];
|
||||
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||
|
||||
// Integer quotient or remainder correctoin, normalization, and special cases
|
||||
if (`IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [`DIVBLEN:0] NormShiftM;
|
||||
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
|
||||
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [P.DIVBLEN:0] NormShiftM;
|
||||
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
|
||||
|
||||
assign W = $signed(Sum) >>> `LOGR;
|
||||
assign W = $signed(Sum) >>> P.LOGR;
|
||||
assign UnsignedQuotM = {3'b000, PreQmM};
|
||||
|
||||
// Integer remainder: sticky and sign correction muxes
|
||||
mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
|
||||
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
|
||||
mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
||||
mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
|
||||
mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
|
||||
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
||||
|
||||
// Select quotient or remainder and do normalization shift
|
||||
mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM);
|
||||
mux2 #(`DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
||||
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
|
||||
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
||||
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
|
||||
|
||||
// special case logic
|
||||
@ -119,18 +117,18 @@ module fdivsqrtpostproc(
|
||||
always_comb
|
||||
if (BZeroM) begin // Divide by zero
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = {(`XLEN){1'b1}};
|
||||
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||
end else if (ALTBM) begin // Numerator is zero
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = '0;
|
||||
end else IntDivResultM = PreIntResultM[`XLEN-1:0];
|
||||
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||
|
||||
// sign extend result for W64
|
||||
if (`XLEN==64) begin
|
||||
mux2 #(64) resmux(IntDivResultM[`XLEN-1:0],
|
||||
{{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
||||
if (P.XLEN==64) begin
|
||||
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
|
||||
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
||||
W64M, FIntDivResultM);
|
||||
end else
|
||||
assign FIntDivResultM = IntDivResultM[`XLEN-1:0];
|
||||
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
|
||||
end
|
||||
endmodule
|
||||
|
@ -26,56 +26,54 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtpreproc (
|
||||
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [`NF:0] Xm, Ym,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [P.NF:0] Xm, Ym,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DIVb+3:0] X, D,
|
||||
output logic [P.NE+1:0] QeM,
|
||||
output logic [P.DIVb+3:0] X, D,
|
||||
// Int-specific
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic IntDivE, W64E,
|
||||
output logic ISpecialCaseE,
|
||||
output logic [`DURLEN-1:0] CyclesE,
|
||||
output logic [`DIVBLEN:0] nM, mM,
|
||||
output logic [P.DURLEN-1:0] CyclesE,
|
||||
output logic [P.DIVBLEN:0] nM, mM,
|
||||
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
||||
output logic AsM, BZeroM,
|
||||
output logic [`XLEN-1:0] AM
|
||||
output logic [P.XLEN-1:0] AM
|
||||
);
|
||||
|
||||
logic [`DIVb-1:0] Xfract, Dfract;
|
||||
logic [`DIVb:0] PreSqrtX;
|
||||
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
||||
logic [P.DIVb-1:0] Xfract, Dfract;
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
logic SignedDivE; // signed division
|
||||
logic NegQuotE; // Integer quotient is negative
|
||||
logic AsE, BsE; // Signs of integer inputs
|
||||
logic [`XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic ALTBE;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Preprocessing
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic [`XLEN-1:0] BE, PosA, PosB;
|
||||
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic [P.XLEN-1:0] BE, PosA, PosB;
|
||||
|
||||
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
||||
assign SignedDivE = ~Funct3E[0];
|
||||
|
||||
// Source handling
|
||||
if (`XLEN==64) begin // 64-bit, supports W64
|
||||
if (P.XLEN==64) begin // 64-bit, supports W64
|
||||
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
||||
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
||||
end else begin // 32 bits only
|
||||
@ -84,21 +82,21 @@ module fdivsqrtpreproc (
|
||||
end
|
||||
assign AZeroE = ~(|AE);
|
||||
assign BZeroE = ~(|BE);
|
||||
assign AsE = AE[`XLEN-1] & SignedDivE;
|
||||
assign BsE = BE[`XLEN-1] & SignedDivE;
|
||||
assign AsE = AE[P.XLEN-1] & SignedDivE;
|
||||
assign BsE = BE[P.XLEN-1] & SignedDivE;
|
||||
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
|
||||
|
||||
// Force integer inputs to be postiive
|
||||
mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA);
|
||||
mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
|
||||
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||
|
||||
// Select integer or floating point inputs
|
||||
mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||
end else begin // Int not supported
|
||||
assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
|
||||
assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
|
||||
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
|
||||
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
|
||||
assign NumerZeroE = XZeroE;
|
||||
end
|
||||
|
||||
@ -107,8 +105,8 @@ module fdivsqrtpreproc (
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||
lzc #(`DIVb) lzcX (IFX, ell);
|
||||
lzc #(`DIVb) lzcY (IFD, mE);
|
||||
lzc #(P.DIVb) lzcX (IFX, ell);
|
||||
lzc #(P.DIVb) lzcY (IFD, mE);
|
||||
|
||||
// Normalization shift: shift off leading one
|
||||
assign Xfract = (IFX << ell) << 1;
|
||||
@ -122,28 +120,28 @@ module fdivsqrtpreproc (
|
||||
// and nE (number of fractional digits)
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||
logic [`DIVBLEN:0] ZeroDiff, p;
|
||||
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||
logic [P.DIVBLEN:0] ZeroDiff, p;
|
||||
|
||||
// calculate number of fractional bits p
|
||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros)
|
||||
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
|
||||
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
|
||||
// Integer special cases (terminate immediately)
|
||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||
|
||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
||||
|
||||
if (`LOGRK > 0) begin // more than 1 bit per cycle
|
||||
logic [`LOGRK-1:0] IntTrunc, RightShiftX;
|
||||
logic [`DIVBLEN:0] TotalIntBits, IntSteps;
|
||||
if (P.LOGRK > 0) begin // more than 1 bit per cycle
|
||||
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
|
||||
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
|
||||
/* verilator lint_off WIDTH */
|
||||
assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator
|
||||
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
|
||||
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
|
||||
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
|
||||
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
|
||||
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
|
||||
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
|
||||
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
||||
/* verilator lint_on WIDTH */
|
||||
end else begin // radix 2 1 copy doesn't require shifting
|
||||
@ -167,42 +165,42 @@ module fdivsqrtpreproc (
|
||||
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
||||
|
||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
||||
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
||||
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
||||
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Selet integer or floating-point operands
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (`IDIV_ON_FPU) begin
|
||||
mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
||||
if (P.IDIV_ON_FPU) begin
|
||||
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
||||
end else begin
|
||||
assign X = PreShiftX;
|
||||
end
|
||||
|
||||
// Divisior register
|
||||
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
||||
|
||||
// Floating-point exponent
|
||||
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
||||
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
||||
|
||||
// Number of FSM cycles (to FSM)
|
||||
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
||||
|
||||
if (`IDIV_ON_FPU) begin:intpipelineregs
|
||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||
// pipeline registers
|
||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
||||
flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
if (`XLEN==64)
|
||||
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
if (P.XLEN==64)
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
end
|
||||
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtqsel2 (
|
||||
input logic [3:0] ps, pc,
|
||||
output logic up, uz, un
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtqsel4 (
|
||||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtqsel4cmp (
|
||||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
|
@ -26,27 +26,26 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
module fdivsqrtstage2 (
|
||||
input logic [`DIVb+3:0] D, DBar,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+1:0] C,
|
||||
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] D, DBar,
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
output logic [`DIVb+1:0] CNext,
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
output logic [P.DIVb+1:0] CNext,
|
||||
output logic [P.DIVb:0] UNext, UMNext,
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||
);
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic [P.DIVb+3:0] Dsel;
|
||||
logic up, uz;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
logic [P.DIVb+3:0] F;
|
||||
logic [P.DIVb+3:0] AddIn;
|
||||
logic [P.DIVb+3:0] WSA, WCA;
|
||||
|
||||
// Qmient Selection logic
|
||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||
@ -56,10 +55,10 @@ module fdivsqrtstage2 (
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
|
||||
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
|
||||
|
||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||
fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
|
||||
// Divisor multiple
|
||||
always_comb
|
||||
@ -69,16 +68,16 @@ module fdivsqrtstage2 (
|
||||
|
||||
// Partial Product Generation
|
||||
// WSA, WCA = WS + WC - qD
|
||||
mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||
assign WSNext = WSA << 1;
|
||||
assign WCNext = WCA << 1;
|
||||
|
||||
// Shift thermometer code C
|
||||
assign CNext = {1'b1, C[`DIVb+1:1]};
|
||||
assign CNext = {1'b1, C[P.DIVb+1:1]};
|
||||
|
||||
// Unified On-The-Fly Converter to accumulate result
|
||||
fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
|
||||
fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -26,29 +26,27 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtstage4 (
|
||||
input logic [`DIVb+3:0] D, DBar, D2, DBar2,
|
||||
input logic [`DIVb:0] U,UM,
|
||||
input logic [`DIVb+3:0] WS, WC,
|
||||
input logic [`DIVb+1:0] C,
|
||||
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
|
||||
input logic [P.DIVb:0] U,UM,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic SqrtE, j1,
|
||||
output logic [`DIVb+1:0] CNext,
|
||||
output logic [P.DIVb+1:0] CNext,
|
||||
output logic un,
|
||||
output logic [`DIVb:0] UNext, UMNext,
|
||||
output logic [`DIVb+3:0] WSNext, WCNext
|
||||
output logic [P.DIVb:0] UNext, UMNext,
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||
);
|
||||
|
||||
logic [`DIVb+3:0] Dsel;
|
||||
logic [P.DIVb+3:0] Dsel;
|
||||
logic [3:0] udigit;
|
||||
logic [`DIVb+3:0] F;
|
||||
logic [`DIVb+3:0] AddIn;
|
||||
logic [P.DIVb+3:0] F;
|
||||
logic [P.DIVb+3:0] AddIn;
|
||||
logic [4:0] Smsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
logic [7:0] WCmsbs, WSmsbs;
|
||||
logic CarryIn;
|
||||
logic [`DIVb+3:0] WSA, WCA;
|
||||
logic [P.DIVb+3:0] WSA, WCA;
|
||||
|
||||
// Digit Selection logic
|
||||
// u encoding:
|
||||
@ -57,16 +55,16 @@ module fdivsqrtstage4 (
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
assign Smsbs = U[`DIVb:`DIVb-4];
|
||||
assign Dmsbs = D[`DIVb-1:`DIVb-3];
|
||||
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
|
||||
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
|
||||
assign Smsbs = U[P.DIVb:P.DIVb-4];
|
||||
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
|
||||
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
|
||||
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
|
||||
|
||||
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||
assign un = 1'b0; // unused for radix 4
|
||||
|
||||
// F generation logic
|
||||
fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
|
||||
// Divisor multiple logic
|
||||
always_comb
|
||||
@ -83,15 +81,15 @@ module fdivsqrtstage4 (
|
||||
// {WS, WC}}Next = (WS + WC - qD or F) << 2
|
||||
assign AddIn = SqrtE ? F : Dsel;
|
||||
assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
|
||||
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
|
||||
csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
|
||||
assign WSNext = WSA << 2;
|
||||
assign WCNext = WCA << 2;
|
||||
|
||||
// Shift thermometer code C
|
||||
assign CNext = {2'b11, C[`DIVb+1:2]};
|
||||
assign CNext = {2'b11, C[P.DIVb+1:2]};
|
||||
|
||||
// On-the-fly converter to accumulate result
|
||||
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
|
||||
fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext);
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -26,22 +26,20 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
///////////////////////////////
|
||||
// Unified OTFC, Radix 2 //
|
||||
///////////////////////////////
|
||||
module fdivsqrtuotfc2(
|
||||
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic up, un,
|
||||
input logic [`DIVb+1:0] C,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
output logic [`DIVb:0] UNext, UMNext
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
output logic [P.DIVb:0] UNext, UMNext
|
||||
);
|
||||
// The on-the-fly converter transfers the divsqrt
|
||||
// bits to the quotient as they come.
|
||||
logic [`DIVb:0] K;
|
||||
logic [P.DIVb:0] K;
|
||||
|
||||
assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||
|
||||
always_comb begin
|
||||
if (up) begin
|
||||
|
@ -26,19 +26,17 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fdivsqrtuotfc4(
|
||||
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3:0] udigit,
|
||||
input logic [`DIVb:0] U, UM,
|
||||
input logic [`DIVb:0] C,
|
||||
output logic [`DIVb:0] UNext, UMNext
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
input logic [P.DIVb:0] C,
|
||||
output logic [P.DIVb:0] UNext, UMNext
|
||||
);
|
||||
// The on-the-fly converter transfers the square root
|
||||
// bits to the quotient as they come.
|
||||
// Use this otfc for division and square root.
|
||||
|
||||
logic [`DIVb:0] K1, K2, K3;
|
||||
logic [P.DIVb:0] K1, K2, K3;
|
||||
assign K1 = (C&~(C << 1)); // K
|
||||
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
||||
assign K3 = (C & ~(C << 2)); // 3K
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
|
@ -26,22 +26,20 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fma(
|
||||
module fma import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, Ys, Zs, // input's signs
|
||||
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
|
||||
input logic [P.NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
|
||||
input logic [P.NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
|
||||
input logic XZero, YZero, ZZero, // is the input zero
|
||||
input logic [2:0] OpCtrl, // operation control
|
||||
output logic ASticky, // sticky bit that is calculated during alignment
|
||||
output logic [3*`NF+3:0] Sm, // the positive sum's significand
|
||||
output logic [3*P.NF+3:0] Sm, // the positive sum's significand
|
||||
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
|
||||
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
|
||||
output logic Ps, // the product's sign
|
||||
output logic Ss, // the sum's sign
|
||||
output logic [`NE+1:0] Se, // the sum's exponent
|
||||
output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count
|
||||
output logic [P.NE+1:0] Se, // the sum's exponent
|
||||
output logic [$clog2(3*P.NF+5)-1:0] SCnt // normalization shift count
|
||||
);
|
||||
|
||||
// OpCtrl:
|
||||
@ -54,12 +52,12 @@ module fma(
|
||||
// 110 - add
|
||||
// 111 - sub
|
||||
|
||||
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
|
||||
logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
|
||||
logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
|
||||
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
|
||||
logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format
|
||||
logic [3*P.NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
|
||||
logic [3*P.NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
|
||||
logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
|
||||
logic KillProd; // set the product to zero before addition if the product is too small to matter
|
||||
logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
|
||||
logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
@ -71,10 +69,10 @@ module fma(
|
||||
|
||||
|
||||
// calculate the product's exponent
|
||||
fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
|
||||
fmaexpadd #(P) expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
|
||||
|
||||
// multiplication of the mantissa's
|
||||
fmamult mult(.Xm, .Ym, .Pm);
|
||||
fmamult #(P) mult(.Xm, .Ym, .Pm);
|
||||
|
||||
// calculate the signs and take the opperation into account
|
||||
fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
|
||||
@ -82,15 +80,15 @@ module fma(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
|
||||
fmaalign #(P) align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
|
||||
|
||||
// ///////////////////////////////////////////////////////////////////////////////
|
||||
// // Addition/LZA
|
||||
// ///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
|
||||
fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
|
||||
|
||||
fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
|
||||
fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -26,25 +26,23 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmaadd(
|
||||
input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
|
||||
input logic [`NE-1:0] Ze, // exponent of Z
|
||||
module fmaadd import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3*P.NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
|
||||
input logic [P.NE-1:0] Ze, // exponent of Z
|
||||
input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
|
||||
input logic [`NE+1:0] Pe, // product's exponet
|
||||
input logic [2*`NF+1:0] Pm, // the product's mantissa
|
||||
input logic [P.NE+1:0] Pe, // product's exponet
|
||||
input logic [2*P.NF+1:0] Pm, // the product's mantissa
|
||||
input logic InvA, // invert the aligned addend
|
||||
input logic KillProd, // should the product be set to 0
|
||||
input logic ASticky, // Alighed addend's sticky bit
|
||||
output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted
|
||||
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
|
||||
output logic [3*P.NF+3:0] AmInv, // aligned addend possibly inverted
|
||||
output logic [2*P.NF+1:0] PmKilled, // the product's mantissa possibly killed
|
||||
output logic Ss, // sum's sign
|
||||
output logic [`NE+1:0] Se, // sum's exponent
|
||||
output logic [3*`NF+3:0] Sm // the positive sum
|
||||
output logic [P.NE+1:0] Se, // sum's exponent
|
||||
output logic [3*P.NF+3:0] Sm // the positive sum
|
||||
);
|
||||
|
||||
logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum
|
||||
logic [3*P.NF+3:0] PreSum, NegPreSum; // possibly negitive sum
|
||||
logic NegSum; // was the sum negitive
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -52,9 +50,9 @@ module fmaadd(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
|
||||
assign AmInv = {3*`NF+4{InvA}}^Am;
|
||||
assign AmInv = {3*P.NF+4{InvA}}^Am;
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign PmKilled = {2*`NF+2{~KillProd}}&Pm;
|
||||
assign PmKilled = {2*P.NF+2{~KillProd}}&Pm;
|
||||
// Do the addition
|
||||
// - calculate a positive and negitive sum in parallel
|
||||
// if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
|
||||
@ -63,8 +61,8 @@ module fmaadd(
|
||||
// addend - prod where product is killed (and not exactly zero) then don't add +1 from negation
|
||||
// ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
|
||||
// in this case this result is only ever selected when InvA=1 so we can remove &InvA
|
||||
assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
|
||||
assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
|
||||
assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
|
||||
assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0};
|
||||
|
||||
// Choose the positive sum and accompanying LZA result.
|
||||
assign Sm = NegSum ? NegPreSum : PreSum;
|
||||
|
@ -27,20 +27,18 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmaalign(
|
||||
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] Zm, // significand in U(0.NF) format]
|
||||
module fmaalign import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
|
||||
input logic [P.NF:0] Zm, // significand in U(0.NF) format]
|
||||
input logic XZero, YZero, ZZero,// is the input zero
|
||||
output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
|
||||
output logic [3*P.NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
|
||||
output logic ASticky, // Sticky bit calculated from the aliged addend
|
||||
output logic KillProd // should the product be set to zero
|
||||
);
|
||||
|
||||
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
|
||||
logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
|
||||
logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
|
||||
logic [4*P.NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [4*P.NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
|
||||
logic KillZ; // should the addend be killed
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -51,16 +49,16 @@ module fmaalign(
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
|
||||
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};
|
||||
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze};
|
||||
|
||||
// Defualt Addition with only inital left shift
|
||||
// | 53'b0 | 106'b(product) | 1'b0 |
|
||||
// | addnend |
|
||||
|
||||
assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
|
||||
assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)};
|
||||
|
||||
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
|
||||
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));
|
||||
assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
|
||||
assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3));
|
||||
|
||||
always_comb begin
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
@ -68,7 +66,7 @@ module fmaalign(
|
||||
// | 53'b0 | 106'b(product) | 1'b0 |
|
||||
// | addnend |
|
||||
if (KillProd) begin
|
||||
ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
|
||||
ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)};
|
||||
ASticky = ~(XZero|YZero);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
@ -86,12 +84,12 @@ module fmaalign(
|
||||
// | addnend |
|
||||
end else begin
|
||||
ZmShifted = ZmPreshifted >> ACnt;
|
||||
ASticky = |(ZmShifted[`NF-1:0]);
|
||||
ASticky = |(ZmShifted[P.NF-1:0]);
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
assign Am = ZmShifted[4*`NF+3:`NF];
|
||||
assign Am = ZmShifted[4*P.NF+3:P.NF];
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -26,18 +26,16 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmaexpadd(
|
||||
input logic [`NE-1:0] Xe, Ye, // input's exponents
|
||||
module fmaexpadd import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NE-1:0] Xe, Ye, // input's exponents
|
||||
input logic XZero, YZero, // are the inputs zero
|
||||
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
|
||||
output logic [P.NE+1:0] Pe // product's exponent B^(1023)NE+2
|
||||
);
|
||||
|
||||
logic PZero; // is the product zero?
|
||||
|
||||
// kill the exponent if the product is zero - either X or Y is 0
|
||||
assign PZero = XZero | YZero;
|
||||
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
|
||||
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)});
|
||||
|
||||
endmodule
|
||||
|
@ -27,11 +27,9 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmalza #(WIDTH) (
|
||||
module fmalza #(WIDTH, NF) (
|
||||
input logic [WIDTH-1:0] A, // addend
|
||||
input logic [2*`NF+1:0] Pm, // product
|
||||
input logic [2*NF+1:0] Pm, // product
|
||||
input logic Cin, // carry in
|
||||
input logic sub, // subtraction
|
||||
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
|
||||
@ -42,7 +40,7 @@ module fmalza #(WIDTH) (
|
||||
logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column
|
||||
logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1
|
||||
|
||||
assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
|
||||
assign B = {{(NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
|
||||
|
||||
assign P = A^B;
|
||||
assign G = A&B;
|
||||
|
@ -26,11 +26,9 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmamult(
|
||||
input logic [`NF:0] Xm, Ym, // x and y significand
|
||||
output logic [2*`NF+1:0] Pm // product's significand
|
||||
module fmamult import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NF:0] Xm, Ym, // x and y significand
|
||||
output logic [2*P.NF+1:0] Pm // product's significand
|
||||
);
|
||||
|
||||
assign Pm = Xm * Ym;
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmasign(
|
||||
input logic [2:0] OpCtrl, // opperation contol
|
||||
input logic Xs, Ys, Zs, // sign of the inputs
|
||||
|
228
src/fpu/fpu.sv
228
src/fpu/fpu.sv
@ -26,9 +26,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// Hazards
|
||||
@ -44,7 +42,7 @@ module fpu (
|
||||
// Execute stage
|
||||
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
|
||||
input logic IntDivE, W64E, // Integer division on FPU
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [4:0] RdE, // which FP register to write to (from IEU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic FCvtIntE, // Convert to int (to IEU)
|
||||
@ -53,16 +51,16 @@ module fpu (
|
||||
input logic [4:0] RdM, // which FP register to write to (from IEU)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic [P.FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [P.XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU)
|
||||
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
|
||||
// Writeback stage
|
||||
input logic [4:0] RdW, // which FP register to write to (from IEU)
|
||||
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
input logic [P.FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [P.XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
output logic [P.XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
);
|
||||
|
||||
// RISC-V FPU specifics:
|
||||
@ -72,7 +70,7 @@ module fpu (
|
||||
// control signals
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic FDivStartE, IDivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
@ -86,20 +84,20 @@ module fpu (
|
||||
logic FRegWriteE; // Write floating-point register
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [P.FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [P.FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [P.XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic [P.NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [P.NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [P.NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [P.NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
@ -110,56 +108,56 @@ module fpu (
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
|
||||
// Fma Signals
|
||||
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
|
||||
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
|
||||
logic [3*`NF+3:0] SmE, SmM; // Sum significand
|
||||
logic [3*P.NF+3:0] SmE, SmM; // Sum significand
|
||||
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
|
||||
logic [`NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic [P.NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic InvAE, InvAM; // Invert addend
|
||||
logic AsE, AsM; // Addend sign
|
||||
logic PsE, PsM; // Product sign
|
||||
logic SsE, SsM; // Sum sign
|
||||
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic [P.NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [P.LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
|
||||
logic CsE, CsM; // convert result sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
logic [P.CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
|
||||
// divide signals
|
||||
logic [`DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [`NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [P.NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic DivStickyM; // fdivsqrt sticky bit
|
||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
|
||||
// result and flag signals
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic [P.XLEN-1:0] ClassResE; // classify result
|
||||
logic [P.FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [P.XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [P.FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [P.FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [4:0] PostProcFlgM; // Postprocessor flags
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
|
||||
// other signals
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
|
||||
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic mvsgn; // sign bit for extending move
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -167,7 +165,7 @@ module fpu (
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
.Funct3E, .IntDivE, .InstrD,
|
||||
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
|
||||
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
|
||||
@ -177,15 +175,15 @@ module fpu (
|
||||
.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
|
||||
|
||||
// FP register file
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
fregfile #(P.FLEN) fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
|
||||
.a4(RdW), .wd4(FResultW),
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
|
||||
// D/E pipeline registers
|
||||
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(P.FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(P.FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(P.FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Execute Stage: hazards, forwarding, unpacking, execution units
|
||||
@ -197,37 +195,37 @@ module fpu (
|
||||
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
|
||||
mux3 #(P.FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
|
||||
mux3 #(P.FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
|
||||
mux3 #(P.FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
|
||||
|
||||
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
|
||||
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
|
||||
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
|
||||
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
|
||||
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign BoxedOneE = {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)};
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) fonemux ({{P.FLEN-P.LEN1{1'b1}}, 2'b0, {P.NE1-1{1'b1}}, (P.NF1)'(0)}, {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) fonemux ({{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)},
|
||||
{2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
|
||||
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
|
||||
mux2 #(P.FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
|
||||
|
||||
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
|
||||
// For add and subtract, Z comes from second source operand
|
||||
if(`FPSIZES == 1) assign BoxedZeroE = 0;
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
|
||||
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
|
||||
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
|
||||
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign BoxedZeroE = 0;
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) fmulzeromux ({{P.FLEN-P.LEN1{1'b1}}, {P.LEN1{1'b0}}}, (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}},
|
||||
(P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
|
||||
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
|
||||
mux3 #(P.FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
|
||||
|
||||
// unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
|
||||
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
|
||||
unpack #(P) unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
|
||||
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
|
||||
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
|
||||
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
|
||||
@ -235,99 +233,99 @@ module fpu (
|
||||
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
|
||||
|
||||
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
|
||||
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
|
||||
fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
|
||||
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE),
|
||||
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
|
||||
|
||||
// divide and square root: fdiv, fsqrt, optionally integer division
|
||||
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||
.QmM, .FIntDivResultM);
|
||||
|
||||
// compare: fmin/fmax, flt/fle/feq
|
||||
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
||||
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
||||
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
|
||||
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
|
||||
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
|
||||
|
||||
// sign injection: fsgnj/fsgnjx/fsgnjn
|
||||
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));
|
||||
fsgninj #(P) fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));
|
||||
|
||||
// classify: fclass
|
||||
fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
|
||||
fclassify #(P) fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
|
||||
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
|
||||
|
||||
// convert: fcvt.*.*
|
||||
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
|
||||
fcvt #(P) fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
|
||||
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
|
||||
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
|
||||
|
||||
|
||||
// NaN Box SrcA to convert integer to requested FP size
|
||||
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
|
||||
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE};
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]},
|
||||
{{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
|
||||
// select a result that may be written to the FP register
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
|
||||
|
||||
// select the result that may be written to the integer register with fmv - to IEU
|
||||
if(`FPSIZES == 1) begin
|
||||
assign mvsgn = XE[`FLEN-1];
|
||||
if(P.FPSIZES == 1) begin
|
||||
assign mvsgn = XE[P.FLEN-1];
|
||||
assign SgnExtXE = XE;
|
||||
end else if(`FPSIZES == 2) begin
|
||||
mux2 #(1) sgnmux (XE[`LEN1-1], XE[`FLEN-1],FmtE, mvsgn);
|
||||
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{mvsgn}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
|
||||
end else if(`FPSIZES == 3 | `FPSIZES == 4) begin
|
||||
mux4 #(1) sgnmux (XE[`H_LEN-1], XE[`S_LEN-1], XE[`D_LEN-1], XE[`LLEN-1], FmtE, mvsgn);
|
||||
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{mvsgn}}, XE[`H_LEN-1:0]},
|
||||
{{`FLEN-`S_LEN{mvsgn}}, XE[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{mvsgn}}, XE[`D_LEN-1:0]},
|
||||
end else if(P.FPSIZES == 2) begin
|
||||
mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn);
|
||||
mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE);
|
||||
end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin
|
||||
mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn);
|
||||
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]},
|
||||
{{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]},
|
||||
{{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]},
|
||||
XE, FmtE, SgnExtXE);
|
||||
end
|
||||
|
||||
if (`FLEN>`XLEN)
|
||||
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
|
||||
if (P.FLEN>P.XLEN)
|
||||
assign IntSrcXE = SgnExtXE[P.XLEN-1:0];
|
||||
else
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{mvsgn}}, SgnExtXE};
|
||||
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE};
|
||||
mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
|
||||
// E/M pipe registers
|
||||
|
||||
// Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
|
||||
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
|
||||
|
||||
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenrc #(P.NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
|
||||
flopenrc #(P.NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
|
||||
flopenrc #(P.FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(P.XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(P.FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
|
||||
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
|
||||
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
|
||||
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
|
||||
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
|
||||
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
|
||||
flopenrc #(P.FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Memory Stage: postprocessor and result muxes
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
|
||||
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||
@ -337,18 +335,18 @@ module fpu (
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
|
||||
flopenrc #(P.FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(P.XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(P.XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Writeback Stage: result mux
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// select the result to be written to the FP register
|
||||
mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
|
||||
mux2 #(P.FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
|
||||
|
||||
endmodule // fpu
|
||||
|
@ -26,17 +26,15 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fregfile (
|
||||
module fregfile #(parameter FLEN) (
|
||||
input logic clk, reset,
|
||||
input logic we4, // write enable
|
||||
input logic [4:0] a1, a2, a3, a4, // adresses
|
||||
input logic [`FLEN-1:0] wd4, // write data
|
||||
output logic [`FLEN-1:0] rd1, rd2, rd3 // read data
|
||||
input logic [FLEN-1:0] wd4, // write data
|
||||
output logic [FLEN-1:0] rd1, rd2, rd3 // read data
|
||||
);
|
||||
|
||||
logic [`FLEN-1:0] rf[31:0];
|
||||
logic [FLEN-1:0] rf[31:0];
|
||||
integer i;
|
||||
|
||||
// three ported register file
|
||||
|
@ -26,14 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fsgninj (
|
||||
module fsgninj import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, Ys, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] X, // X
|
||||
input logic [`FMTBITS-1:0] Fmt, // format
|
||||
input logic [P.FLEN-1:0] X, // X
|
||||
input logic [P.FMTBITS-1:0] Fmt, // format
|
||||
input logic [1:0] OpCtrl, // operation control
|
||||
output logic [`FLEN-1:0] SgnRes // result
|
||||
output logic [P.FLEN-1:0] SgnRes // result
|
||||
);
|
||||
|
||||
logic ResSgn; // result sign
|
||||
@ -50,30 +48,30 @@ module fsgninj (
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unused bits the most significant bits are filled with 1s
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
|
||||
else if (`FPSIZES == 2)
|
||||
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
|
||||
else if (`FPSIZES == 3) begin
|
||||
if (P.FPSIZES == 1)
|
||||
assign SgnRes = {ResSgn, X[P.FLEN-2:0]};
|
||||
else if (P.FPSIZES == 2)
|
||||
assign SgnRes = {~Fmt|ResSgn, X[P.FLEN-2:P.LEN1], Fmt ? X[P.LEN1-1] : ResSgn, X[P.LEN1-2:0]};
|
||||
else if (P.FPSIZES == 3) begin
|
||||
logic [2:0] SgnBits;
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
|
||||
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
|
||||
`FMT2: SgnBits = {2'b11, ResSgn};
|
||||
P.FMT: SgnBits = {ResSgn, X[P.LEN1-1], X[P.LEN2-1]};
|
||||
P.FMT1: SgnBits = {1'b1, ResSgn, X[P.LEN2-1]};
|
||||
P.FMT2: SgnBits = {2'b11, ResSgn};
|
||||
default: SgnBits = {3{1'bx}};
|
||||
endcase
|
||||
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
|
||||
end else if (`FPSIZES == 4) begin
|
||||
assign SgnRes = {SgnBits[2], X[P.FLEN-2:P.LEN1], SgnBits[1], X[P.LEN1-2:P.LEN2], SgnBits[0], X[P.LEN2-2:0]};
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
logic [3:0] SgnBits;
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
|
||||
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
|
||||
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
|
||||
`H_FMT: SgnBits = {3'b111, ResSgn};
|
||||
P.Q_FMT: SgnBits = {ResSgn, X[P.D_LEN-1], X[P.S_LEN-1], X[P.H_LEN-1]};
|
||||
P.D_FMT: SgnBits = {1'b1, ResSgn, X[P.S_LEN-1], X[P.H_LEN-1]};
|
||||
P.S_FMT: SgnBits = {2'b11, ResSgn, X[P.H_LEN-1]};
|
||||
P.H_FMT: SgnBits = {3'b111, ResSgn};
|
||||
endcase
|
||||
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
|
||||
assign SgnRes = {SgnBits[3], X[P.Q_LEN-2:P.D_LEN], SgnBits[2], X[P.D_LEN-2:P.S_LEN], SgnBits[1], X[P.S_LEN-2:P.H_LEN], SgnBits[0], X[P.H_LEN-2:0]};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -26,22 +26,20 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cvtshiftcalc(
|
||||
module cvtshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic XZero, // is the input zero?
|
||||
input logic ToInt, // to integer conversion?
|
||||
input logic IntToFp, // interger to floating point conversion?
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent
|
||||
input logic [`NF:0] Xm, // input mantissas
|
||||
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [P.NE:0] CvtCe, // the calculated expoent
|
||||
input logic [P.NF:0] Xm, // input mantissas
|
||||
input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
|
||||
input logic CvtResSubnormUf, // is the conversion result subnormal or underlows
|
||||
output logic CvtResUf, // does the cvt result unerflow
|
||||
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
|
||||
output logic [P.CVTLEN+P.NF:0] CvtShiftIn // number to be shifted
|
||||
);
|
||||
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
logic [$clog2(P.NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
@ -49,7 +47,7 @@ module cvtshiftcalc(
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | mantissa | 0's if nessisary |
|
||||
// | P.XLEN zeros | mantissa | 0's if nessisary |
|
||||
// .
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
@ -57,7 +55,7 @@ module cvtshiftcalc(
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | mantissa | 0's if nessisary |
|
||||
// | P.NF-1 zeros | mantissa | 0's if nessisary |
|
||||
// .
|
||||
// - otherwise:
|
||||
// | LzcInM | 0's if nessisary |
|
||||
@ -67,33 +65,33 @@ module cvtshiftcalc(
|
||||
// get rid of round bit if needed
|
||||
// | add sticky bit if needed
|
||||
// | |
|
||||
if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
|
||||
else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
|
||||
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
|
||||
if (ToInt) CvtShiftIn = {{P.XLEN{1'b0}}, Xm[P.NF]&~CvtCe[P.NE], Xm[P.NF-1]|(CvtCe[P.NE]&Xm[P.NF]), Xm[P.NF-2:0], {P.CVTLEN-P.XLEN{1'b0}}};
|
||||
else if (CvtResSubnormUf) CvtShiftIn = {{P.NF-1{1'b0}}, Xm, {P.CVTLEN-P.NF+1{1'b0}}};
|
||||
else CvtShiftIn = {CvtLzcIn, {P.NF+1{1'b0}}};
|
||||
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(P.NF)+1)'(P.NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(P.NF)+1)'(P.NF) : -($clog2(P.NF)+1)'(P.NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
P.FMT: ResNegNF = -($clog2(P.NF)+1)'(P.NF);
|
||||
P.FMT1: ResNegNF = -($clog2(P.NF)+1)'(P.NF1);
|
||||
P.FMT2: ResNegNF = -($clog2(P.NF)+1)'(P.NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
2'h3: ResNegNF = -($clog2(P.NF)+1)'(P.Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(P.NF)+1)'(P.D_NF);
|
||||
2'h0: ResNegNF = -($clog2(P.NF)+1)'(P.S_NF);
|
||||
2'h2: ResNegNF = -($clog2(P.NF)+1)'(P.H_NF);
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -102,6 +100,6 @@ module cvtshiftcalc(
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
|
||||
assign CvtResUf = ($signed(CvtCe) < $signed({{P.NE-$clog2(P.NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,24 +26,22 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////`include "wally-config.vh"
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module divshiftcalc(
|
||||
input logic [`DIVb:0] DivQm, // divsqrt significand
|
||||
input logic [`NE+1:0] DivQe, // divsqrt exponent
|
||||
output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
|
||||
output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
|
||||
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
||||
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
|
||||
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
|
||||
output logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
output logic DivSubnormShiftPos // is the subnormal shift amount positive
|
||||
);
|
||||
|
||||
logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
|
||||
logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
|
||||
logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
|
||||
logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount
|
||||
|
||||
// is the result subnormal
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
||||
assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
|
||||
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
|
||||
|
||||
// if the result is subnormal
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
@ -51,8 +49,8 @@ module divshiftcalc(
|
||||
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivQe+NF+1-1
|
||||
assign DivSubnormShift = (`NE+2)'(`NF)+DivQe;
|
||||
assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1];
|
||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
|
||||
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
||||
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
@ -62,13 +60,13 @@ module divshiftcalc(
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||
assign NormShift = (`LOGNORMSHIFTSZ)'(`NF);
|
||||
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
|
||||
|
||||
// if the shift amount is negitive then don't shift (keep sticky bit)
|
||||
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
|
||||
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0;
|
||||
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0;
|
||||
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
||||
|
||||
// pre-shift the divider result for normalization
|
||||
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}};
|
||||
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
|
||||
endmodule
|
||||
|
@ -25,18 +25,17 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module flags(
|
||||
module flags import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XInf, YInf, ZInf, // inputs are infinity
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] Me, // exponent of the normalized sum
|
||||
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [P.NE+1:0] Me, // exponent of the normalized sum
|
||||
// rounding
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Round, Guard, Sticky, // bits used to determine rounding
|
||||
@ -47,7 +46,7 @@ module flags(
|
||||
input logic IntToFp, // convert integer to floating point
|
||||
input logic Int64, // convert to 64 bit integer
|
||||
input logic Signed, // convert to a signed integer
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
|
||||
input logic [P.NE:0] CvtCe, // the calculated expoent - Cvt
|
||||
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
|
||||
// divsqrt
|
||||
input logic DivOp, // conversion opperation?
|
||||
@ -92,33 +91,33 @@ module flags(
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
|
||||
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
|
||||
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
|
||||
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
assign ShiftGtIntSz = (|FullRe[P.NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
|
||||
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
|
||||
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
|
||||
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
|
||||
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
|
||||
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
|
||||
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
|
||||
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
|
||||
endcase
|
||||
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
assign ShiftGtIntSz = (|FullRe[P.Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
|
||||
end
|
||||
|
||||
|
||||
@ -127,7 +126,7 @@ module flags(
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Underflow
|
||||
@ -141,7 +140,7 @@ module flags(
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -156,7 +155,7 @@ module flags(
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
|
||||
assign IntInexact = ((CvtCe[P.NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
@ -178,7 +177,7 @@ module flags(
|
||||
// | | | | or the res rounds up out of bounds
|
||||
// | | | | and the res didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
|
||||
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[P.NE+1])|((Xs&~Signed)&(~((CvtCe[P.NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
|
||||
// |
|
||||
// or when the positive res rounds up out of range
|
||||
|
||||
|
@ -26,21 +26,19 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmashiftcalc(
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] FmaSe, // sum's exponent
|
||||
input logic [3*`NF+3:0] FmaSm, // the positive sum
|
||||
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count
|
||||
output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [P.NE+1:0] FmaSe, // sum's exponent
|
||||
input logic [3*P.NF+3:0] FmaSm, // the positive sum
|
||||
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // normalization shift count
|
||||
output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
output logic FmaSZero, // is the result subnormal - calculated before LZA corection
|
||||
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
|
||||
output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count
|
||||
output logic [3*`NF+5:0] FmaShiftIn // is the sum zero
|
||||
output logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt, // normalization shift count
|
||||
output logic [3*P.NF+5:0] FmaShiftIn // is the sum zero
|
||||
);
|
||||
logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
|
||||
logic [`NE+1:0] BiasCorr; // correction for bias
|
||||
logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias
|
||||
logic [P.NE+1:0] BiasCorr; // correction for bias
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
@ -50,75 +48,75 @@ module fmashiftcalc(
|
||||
assign FmaSZero = ~(|FmaSm);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);
|
||||
assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3);
|
||||
|
||||
//convert the sum's exponent into the proper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign NormSumExp = PreNormSumExp;
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS);
|
||||
assign NormSumExp = PreNormSumExp+BiasCorr;
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
`FMT: BiasCorr = '0;
|
||||
`FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
|
||||
`FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
|
||||
P.FMT: BiasCorr = '0;
|
||||
P.FMT1: BiasCorr = (P.NE+2)'(P.BIAS1-P.BIAS);
|
||||
P.FMT2: BiasCorr = (P.NE+2)'(P.BIAS2-P.BIAS);
|
||||
default: BiasCorr = 'x;
|
||||
endcase
|
||||
end
|
||||
assign NormSumExp = PreNormSumExp+BiasCorr;
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
2'h3: BiasCorr = '0;
|
||||
2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
|
||||
2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
|
||||
2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
|
||||
2'h1: BiasCorr = (P.NE+2)'(P.D_BIAS-P.Q_BIAS);
|
||||
2'h0: BiasCorr = (P.NE+2)'(P.S_BIAS-P.Q_BIAS);
|
||||
2'h2: BiasCorr = (P.NE+2)'(P.H_BIAS-P.Q_BIAS);
|
||||
endcase
|
||||
end
|
||||
assign NormSumExp = PreNormSumExp+BiasCorr;
|
||||
end
|
||||
|
||||
// determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
|
||||
if (`FPSIZES == 1) begin
|
||||
if (P.FPSIZES == 1) begin
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
|
||||
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
|
||||
assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
end else if (`FPSIZES == 2) begin
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
|
||||
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
|
||||
assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
|
||||
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
|
||||
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
|
||||
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-2+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
|
||||
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS2));
|
||||
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-2+P.BIAS-P.BIAS2)) | ~|PreNormSumExp;
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
`FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
`FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
|
||||
`FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
|
||||
P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
|
||||
P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
|
||||
default: FmaPreResultSubnorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
|
||||
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
|
||||
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
|
||||
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
|
||||
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
|
||||
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
|
||||
assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
|
||||
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-2));
|
||||
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.D_BIAS));
|
||||
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.D_NF-2+P.BIAS-P.D_BIAS)) | ~|PreNormSumExp;
|
||||
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.S_BIAS));
|
||||
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.S_NF-2+P.BIAS-P.S_BIAS)) | ~|PreNormSumExp;
|
||||
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.H_BIAS));
|
||||
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-2+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp;
|
||||
always_comb begin
|
||||
case (Fmt)
|
||||
2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
|
||||
@ -132,6 +130,6 @@ module fmashiftcalc(
|
||||
// set and calculate the shift input and amount
|
||||
// - shift once if killing a product and the result is subnormal
|
||||
assign FmaShiftIn = {2'b0, FmaSm};
|
||||
if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
|
||||
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
|
||||
if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1;
|
||||
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
|
||||
endmodule
|
||||
|
@ -25,26 +25,25 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module negateintres(
|
||||
module negateintres import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Signed, // is the integer input signed
|
||||
input logic Int64, // is the integer input 64-bits
|
||||
input logic Plus1, // should one be added for rounding?
|
||||
input logic Xs, // X sign
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter
|
||||
input logic [P.NORMSHIFTSZ-1:0] Shifted, // output from normalization shifter
|
||||
output logic [1:0] CvtNegResMsbs, // most signigficant bits of possibly negated result
|
||||
output logic [`XLEN+1:0] CvtNegRes // possibly negated integer result
|
||||
output logic [P.XLEN+1:0] CvtNegRes // possibly negated integer result
|
||||
);
|
||||
|
||||
logic [`XLEN+1:0] CvtPreRes; // integer result with rounding
|
||||
logic [P.XLEN+1:0] CvtPreRes; // integer result with rounding
|
||||
logic [2:0] CvtNegResMsbs3; // first three msbs of possibly negated result
|
||||
|
||||
// round and negate the positive res if needed
|
||||
assign CvtPreRes = {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
mux2 #(`XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
|
||||
assign CvtPreRes = {2'b0, Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.XLEN]}+{{P.XLEN+1{1'b0}}, Plus1};
|
||||
mux2 #(P.XLEN+2) resmux(CvtPreRes, -CvtPreRes, Xs, CvtNegRes);
|
||||
|
||||
// select 2 most significant bits
|
||||
mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[`XLEN+1:`XLEN-1], Int64, CvtNegResMsbs3);
|
||||
mux2 #(3) msb3mux(CvtNegRes[33:31], CvtNegRes[P.XLEN+1:P.XLEN-1], Int64, CvtNegResMsbs3);
|
||||
mux2 #(2) msb2mux(CvtNegResMsbs3[2:1], CvtNegResMsbs3[1:0], Signed, CvtNegResMsbs);
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -25,8 +25,6 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
|
||||
// convert shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
@ -72,11 +70,11 @@
|
||||
// | Nf 0's | Qm | << calculated shift amount
|
||||
// .
|
||||
|
||||
module normshift(
|
||||
input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
|
||||
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
|
||||
output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result
|
||||
module normshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
|
||||
input logic [P.NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
|
||||
output logic [P.NORMSHIFTSZ-1:0] Shifted // shifted result
|
||||
);
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,14 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess (
|
||||
module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
|
||||
input logic [P.NF:0] Xm, Ym, Zm, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic XInf, YInf, ZInf, // inputs are infinity
|
||||
@ -44,63 +42,63 @@ module postprocess (
|
||||
input logic FmaAs, // the modified Z sign - depends on instruction
|
||||
input logic FmaPs, // the product's sign
|
||||
input logic FmaSs, // Sum sign
|
||||
input logic [`NE+1:0] FmaSe, // the sum's exponent
|
||||
input logic [3*`NF+3:0] FmaSm, // the positive sum
|
||||
input logic [P.NE+1:0] FmaSe, // the sum's exponent
|
||||
input logic [3*P.NF+3:0] FmaSm, // the positive sum
|
||||
input logic FmaASticky, // sticky bit that is calculated during alignment
|
||||
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count
|
||||
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
|
||||
//divide signals
|
||||
input logic DivSticky, // divider sticky bit
|
||||
input logic [`NE+1:0] DivQe, // divsqrt exponent
|
||||
input logic [`DIVb:0] DivQm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
||||
// conversion signals
|
||||
input logic CvtCs, // the result's sign
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent
|
||||
input logic [P.NE:0] CvtCe, // the calculated expoent
|
||||
input logic CvtResSubnormUf, // the convert result is subnormal or underflows
|
||||
input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
|
||||
input logic [P.LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
|
||||
input logic ToInt, // is fp->int (since it's writting to the integer register)
|
||||
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
|
||||
input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
|
||||
input logic IntZero, // is the integer input zero
|
||||
// final results
|
||||
output logic [`FLEN-1:0] PostProcRes,// postprocessor final result
|
||||
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
|
||||
output logic [4:0] PostProcFlg,// postprocesser flags
|
||||
output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result
|
||||
output logic [P.XLEN-1:0] FCvtIntRes // the integer conversion result
|
||||
);
|
||||
|
||||
// general signals
|
||||
logic Rs; // result sign
|
||||
logic [`NF-1:0] Rf; // Result fraction
|
||||
logic [`NE-1:0] Re; // Result exponent
|
||||
logic [P.NF-1:0] Rf; // Result fraction
|
||||
logic [P.NE-1:0] Re; // Result exponent
|
||||
logic Ms; // norMalized sign
|
||||
logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction
|
||||
logic [`NE+1:0] Me; // normalized exponent
|
||||
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction
|
||||
logic [P.NE+1:0] Me; // normalized exponent
|
||||
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
|
||||
logic [P.NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
|
||||
logic [P.NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
|
||||
logic Plus1; // add one to the final result?
|
||||
logic Overflow; // overflow flag used to select results
|
||||
logic Invalid; // invalid flag used to select results
|
||||
logic Guard, Round, Sticky; // bits needed to determine rounding
|
||||
logic [`FMTBITS-1:0] OutFmt; // output format
|
||||
logic [P.FMTBITS-1:0] OutFmt; // output format
|
||||
// fma signals
|
||||
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
|
||||
logic [P.NE+1:0] FmaMe; // exponent of the normalized sum
|
||||
logic FmaSZero; // is the sum zero
|
||||
logic [3*`NF+5:0] FmaShiftIn; // fma shift input
|
||||
logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
logic [3*P.NF+5:0] FmaShiftIn; // fma shift input
|
||||
logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
|
||||
logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
|
||||
// division singals
|
||||
logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
|
||||
logic DivByZero; // divide by zero flag
|
||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||
// conversion signals
|
||||
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter
|
||||
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
|
||||
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
|
||||
logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result
|
||||
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
|
||||
logic CvtResUf; // did the convert result underflow
|
||||
logic IntInvalid; // invalid integer flag
|
||||
// readability signals
|
||||
@ -132,9 +130,9 @@ module postprocess (
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: Fmt contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
if (P.FPSIZES == 2)
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
|
||||
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -142,40 +140,40 @@ module postprocess (
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// final claulations before shifting
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
|
||||
cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
|
||||
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
|
||||
divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
// select which unit's output to shift
|
||||
always_comb
|
||||
case(PostProcSel)
|
||||
2'b10: begin // fma
|
||||
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
|
||||
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
|
||||
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
|
||||
ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}};
|
||||
end
|
||||
2'b00: begin // cvt
|
||||
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
|
||||
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
|
||||
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};
|
||||
ShiftIn = {CvtShiftIn, {P.NORMSHIFTSZ-P.CVTLEN-P.NF-1{1'b0}}};
|
||||
end
|
||||
2'b01: begin //divsqrt
|
||||
ShiftAmt = DivShiftAmt;
|
||||
ShiftIn = DivShiftIn;
|
||||
end
|
||||
default: begin
|
||||
ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}};
|
||||
ShiftIn = {`NORMSHIFTSZ{1'bx}};
|
||||
ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}};
|
||||
ShiftIn = {P.NORMSHIFTSZ{1'bx}};
|
||||
end
|
||||
endcase
|
||||
|
||||
// main normalization shift
|
||||
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
// correct for LZA/divsqrt error
|
||||
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
|
||||
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
|
||||
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -191,7 +189,7 @@ module postprocess (
|
||||
// calulate result sign used in rounding unit
|
||||
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
|
||||
|
||||
round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
|
||||
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||
|
||||
@ -206,7 +204,7 @@ module postprocess (
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
|
||||
flags #(P) flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
|
||||
.NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
|
||||
.Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
|
||||
@ -216,9 +214,9 @@ module postprocess (
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
|
||||
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
|
||||
specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
|
||||
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
|
||||
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
|
||||
.XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultsign(
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic FmaOp, // is the operation an Fma
|
||||
@ -77,4 +75,4 @@ module resultsign(
|
||||
else if(FmaSZero&FmaOp) Rs = Zeros;
|
||||
else Rs = Ms;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,42 +26,32 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
|
||||
|
||||
module round(
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
module round import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [1:0] PostProcSel, // select the postprocessor output
|
||||
input logic Ms, // normalized sign
|
||||
input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction
|
||||
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction
|
||||
// fma
|
||||
input logic FmaOp, // is an fma opperation being done?
|
||||
input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma
|
||||
input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma
|
||||
input logic FmaASticky, // addend's sticky bit
|
||||
// divsqrt
|
||||
input logic DivOp, // is a division opperation being done
|
||||
input logic DivSticky, // divsqrt sticky bit
|
||||
input logic [`NE+1:0] Qe, // the divsqrt calculated expoent
|
||||
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
|
||||
// cvt
|
||||
input logic CvtOp, // is a convert opperation being done
|
||||
input logic ToInt, // is the cvt op a cvt to integer
|
||||
input logic CvtResSubnormUf, // is the cvt result subnormal or underflow
|
||||
input logic CvtResUf, // does the cvt result underflow
|
||||
input logic [`NE:0] CvtCe, // the cvt calculated expoent
|
||||
input logic [P.NE:0] CvtCe, // the cvt calculated expoent
|
||||
// outputs
|
||||
output logic [`NE+1:0] Me, // normalied fraction
|
||||
output logic [P.NE+1:0] Me, // normalied fraction
|
||||
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
|
||||
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [`NE-1:0] Re, // Result exponent
|
||||
output logic [`NF-1:0] Rf, // Result fractionNormS
|
||||
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [P.NE-1:0] Re, // Result exponent
|
||||
output logic [P.NF-1:0] Rf, // Result fractionNormS
|
||||
output logic Sticky, // sticky bit
|
||||
output logic Plus1, // do you add one to the final result
|
||||
output logic Round, Guard // bits needed to calculate rounding
|
||||
@ -69,7 +59,7 @@ module round(
|
||||
|
||||
logic UfCalcPlus1; // calculated plus one for unbounded exponent
|
||||
logic NormSticky; // normalized sum's sticky bit
|
||||
logic [`NF-1:0] RoundFrac; // rounded fraction
|
||||
logic [P.NF-1:0] RoundFrac; // rounded fraction
|
||||
logic FpRes; // is the result a floating point
|
||||
logic IntRes; // is the result an integer
|
||||
logic FpGuard, FpRound; // floating point round/guard bits
|
||||
@ -77,8 +67,17 @@ module round(
|
||||
logic LsbRes; // lsb of result
|
||||
logic CalcPlus1; // calculated plus1
|
||||
logic FpPlus1; // do you add one to the fp result
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [P.FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
//`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
|
||||
localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -115,68 +114,68 @@ module round(
|
||||
assign FpRes = ~IntRes;
|
||||
|
||||
// sticky bit calculation
|
||||
if (`FPSIZES == 1) begin
|
||||
if (P.FPSIZES == 1) begin
|
||||
|
||||
// 1: XLEN > NF
|
||||
// | XLEN |
|
||||
// | NF |1|1|
|
||||
// ^ ^ if floating point result
|
||||
// ^ if not an FMA result
|
||||
if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN
|
||||
if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// XLEN is either 64 or 32
|
||||
// so half and single are always smaller then XLEN
|
||||
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1)|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
// Quad precision will always be greater than XLEN
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
|
||||
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
|
||||
|
||||
end
|
||||
|
||||
@ -184,40 +183,40 @@ module round(
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
|
||||
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[P.NE+1]&FmaOp | DivSticky&DivOp;
|
||||
|
||||
|
||||
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
|
||||
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
|
||||
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
|
||||
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
|
||||
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
|
||||
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF-2];
|
||||
P.FMT: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
|
||||
end
|
||||
`FMT1: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
|
||||
P.FMT1: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
|
||||
end
|
||||
`FMT2: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
|
||||
P.FMT2: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpGuard = 1'bx;
|
||||
@ -225,35 +224,35 @@ module round(
|
||||
FpRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
|
||||
FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
|
||||
assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
|
||||
assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;
|
||||
assign Guard = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
|
||||
assign LsbRes = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
|
||||
assign Round = ToInt&CvtOp ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;
|
||||
|
||||
|
||||
always_comb begin
|
||||
@ -287,26 +286,26 @@ module round(
|
||||
|
||||
|
||||
// place Plus1 into the proper position for the format
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
|
||||
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
|
||||
|
||||
end else if (`FPSIZES == 4)
|
||||
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
|
||||
end else if (P.FPSIZES == 4)
|
||||
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
|
||||
|
||||
|
||||
|
||||
// trim unneeded bits from fraction
|
||||
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
|
||||
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
|
||||
|
||||
|
||||
|
||||
@ -314,7 +313,7 @@ module round(
|
||||
always_comb
|
||||
case(PostProcSel)
|
||||
2'b10: Me = FmaMe; // fma
|
||||
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
|
||||
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
|
||||
// 2'b01: Me = DivDone ? Qe : '0; // divide
|
||||
2'b01: Me = Qe; // divide
|
||||
default: Me = '0;
|
||||
@ -325,7 +324,7 @@ module round(
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
|
||||
assign Re = FullRe[`NE-1:0];
|
||||
assign Re = FullRe[P.NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -25,7 +25,6 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module roundsign(
|
||||
input logic Xs, // x sign
|
||||
@ -47,4 +46,4 @@ module roundsign(
|
||||
// Select sign for rounding calulation
|
||||
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,53 +26,51 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module shiftcorrection(
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
input logic [`NE+1:0] DivQe, // the divsqrt result's exponent
|
||||
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
|
||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||
//fma
|
||||
input logic FmaOp, // is it an fma opperation
|
||||
input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
|
||||
input logic FmaSZero,
|
||||
// output
|
||||
output logic [`NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] Qe // corrected exponent for divider
|
||||
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [P.NE+1:0] Qe // corrected exponent for divider
|
||||
);
|
||||
|
||||
logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
|
||||
logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
|
||||
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
|
||||
logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
|
||||
logic ResSubnorm; // is the result Subnormal
|
||||
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
|
||||
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
|
||||
assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1];
|
||||
|
||||
// correct the shifting error caused by the LZA
|
||||
// - the only possible mantissa for a plus two is all zeroes
|
||||
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
|
||||
mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
|
||||
|
||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
|
||||
assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
|
||||
assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
|
||||
mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
|
||||
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
|
||||
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
|
||||
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
|
||||
always_comb
|
||||
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
|
||||
if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}};
|
||||
else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
|
||||
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ];
|
||||
|
||||
// Determine sum's exponent
|
||||
// main exponent issues:
|
||||
@ -82,12 +80,12 @@ module shiftcorrection(
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
|
||||
// if plus1 If plus2 kill if the result Zero or actually subnormal
|
||||
// | | |
|
||||
assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
|
||||
assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
|
||||
|
||||
// recalculate if the result is subnormal after LZA correction
|
||||
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
|
||||
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
||||
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
||||
|
@ -26,14 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module specialcase(
|
||||
module specialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input significand's
|
||||
input logic [P.NF:0] Xm, Ym, Zm, // input significand's
|
||||
input logic XNaN, YNaN, ZNaN, // are the inputs NaN
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // are any inputs infinity
|
||||
input logic NaNIn, // are any input NaNs
|
||||
input logic XInf, YInf, // are X or Y inifnity
|
||||
@ -41,9 +39,9 @@ module specialcase(
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Rs, // the result's sign
|
||||
input logic Invalid, Overflow, // flags to choose the result
|
||||
input logic [`NE-1:0] Re, // Result exponent
|
||||
input logic [`NE+1:0] FullRe, // Result full exponent
|
||||
input logic [`NF-1:0] Rf, // Result fraction
|
||||
input logic [P.NE-1:0] Re, // Result exponent
|
||||
input logic [P.NE+1:0] FullRe, // Result full exponent
|
||||
input logic [P.NF-1:0] Rf, // Result fraction
|
||||
// fma
|
||||
input logic FmaOp, // is it a fma opperation
|
||||
// divsqrt
|
||||
@ -55,23 +53,23 @@ module specialcase(
|
||||
input logic IntToFp, // is cvt int -> fp opperation
|
||||
input logic Int64, // is the integer 64 bits
|
||||
input logic Signed, // is the integer signed
|
||||
input logic [`NE:0] CvtCe, // the calculated expoent for cvt
|
||||
input logic [P.NE:0] CvtCe, // the calculated expoent for cvt
|
||||
input logic IntInvalid, // integer invalid flag to choose the result
|
||||
input logic CvtResUf, // does the convert result underflow
|
||||
input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
|
||||
input logic [P.XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
|
||||
// outputs
|
||||
output logic [`FLEN-1:0] PostProcRes,// final result
|
||||
output logic [`XLEN-1:0] FCvtIntRes // final integer result
|
||||
output logic [P.FLEN-1:0] PostProcRes,// final result
|
||||
output logic [P.XLEN-1:0] FCvtIntRes // final integer result
|
||||
);
|
||||
|
||||
logic [`FLEN-1:0] XNaNRes; // X is NaN result
|
||||
logic [`FLEN-1:0] YNaNRes; // Y is NaN result
|
||||
logic [`FLEN-1:0] ZNaNRes; // Z is NaN result
|
||||
logic [`FLEN-1:0] InvalidRes; // Invalid result result
|
||||
logic [`FLEN-1:0] UfRes; // underflowed result result
|
||||
logic [`FLEN-1:0] OfRes; // overflowed result result
|
||||
logic [`FLEN-1:0] NormRes; // normal result
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
|
||||
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
|
||||
logic [P.FLEN-1:0] ZNaNRes; // Z is NaN result
|
||||
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
|
||||
logic [P.FLEN-1:0] UfRes; // underflowed result result
|
||||
logic [P.FLEN-1:0] OfRes; // overflowed result result
|
||||
logic [P.FLEN-1:0] NormRes; // normal result
|
||||
logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic OfResMax; // does the of result output maximum norm fp number
|
||||
logic KillRes; // kill the result for underflow
|
||||
logic SelOfRes; // should the overflow result be selected
|
||||
@ -82,158 +80,158 @@ module specialcase(
|
||||
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
|
||||
|
||||
// select correct outputs for special cases
|
||||
if (`FPSIZES == 1) begin
|
||||
if (P.FPSIZES == 1) begin
|
||||
//NaN res selection depending on standard
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
|
||||
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
assign ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = {Rs, Re, Rf};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
|
||||
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
assign ZNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
|
||||
always_comb
|
||||
if(OutFmt)
|
||||
if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
|
||||
else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
|
||||
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
else
|
||||
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
|
||||
else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
|
||||
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
|
||||
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
P.FMT: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
`FMT1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
|
||||
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
P.FMT1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
ZNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF1]};
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
|
||||
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
|
||||
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
P.FMT2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
|
||||
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
|
||||
ZNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.NF2]};
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = (`FLEN)'(0);
|
||||
YNaNRes = (`FLEN)'(0);
|
||||
ZNaNRes = (`FLEN)'(0);
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = (P.FLEN)'(0);
|
||||
YNaNRes = (P.FLEN)'(0);
|
||||
ZNaNRes = (P.FLEN)'(0);
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end
|
||||
OfRes = (`FLEN)'(0);
|
||||
UfRes = (`FLEN)'(0);
|
||||
NormRes = (`FLEN)'(0);
|
||||
OfRes = (P.FLEN)'(0);
|
||||
UfRes = (P.FLEN)'(0);
|
||||
NormRes = (P.FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
ZNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Zm[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
2'h1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
|
||||
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
|
||||
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
|
||||
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
|
||||
ZNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.D_NF]};
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
|
||||
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
|
||||
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
|
||||
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
|
||||
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
|
||||
ZNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.S_NF]};
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
|
||||
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
|
||||
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
|
||||
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
|
||||
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
|
||||
ZNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Zm[P.NF-2:P.NF-P.H_NF]};
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
|
||||
// zero is exact if dividing by infinity so don't add 1
|
||||
UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
|
||||
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -242,13 +240,13 @@ module specialcase(
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
|
||||
|
||||
// calculate if the overflow result should be selected
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
|
||||
|
||||
// output infinity with result sign if divide by zero
|
||||
if(`IEEE754)
|
||||
if(P.IEEE754)
|
||||
always_comb
|
||||
if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
|
||||
else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
|
||||
@ -283,14 +281,14 @@ module specialcase(
|
||||
always_comb
|
||||
if(Signed)
|
||||
if(Xs&~NaNIn) // signed negitive
|
||||
if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
|
||||
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
|
||||
if(Int64) OfIntRes = {1'b1, {P.XLEN-1{1'b0}}};
|
||||
else OfIntRes = {{P.XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
|
||||
else // signed positive
|
||||
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
|
||||
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
|
||||
if(Int64) OfIntRes = {1'b0, {P.XLEN-1{1'b1}}};
|
||||
else OfIntRes = {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
|
||||
else
|
||||
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
|
||||
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
|
||||
if(Xs&~NaNIn) OfIntRes = {P.XLEN{1'b0}}; // unsigned negitive
|
||||
else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive
|
||||
|
||||
|
||||
// select the integer output
|
||||
@ -301,9 +299,9 @@ module specialcase(
|
||||
// - otherwise output the normal res (trmined and sign extended if nessisary)
|
||||
always_comb
|
||||
if(IntInvalid) FCvtIntRes = OfIntRes;
|
||||
else if(CvtCe[`NE])
|
||||
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
|
||||
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
|
||||
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
|
||||
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
|
||||
endmodule
|
||||
else if(CvtCe[P.NE])
|
||||
if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}};
|
||||
else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1};
|
||||
else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0];
|
||||
else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
|
||||
endmodule
|
||||
|
@ -25,41 +25,40 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module unpack (
|
||||
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
|
||||
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
module unpack import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FLEN-1:0] X, Y, Z, // inputs from register file
|
||||
input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
input logic XEn, YEn, ZEn, // input enables
|
||||
output logic Xs, Ys, Zs, // sign bits of XYZ
|
||||
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic [P.NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [P.NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
|
||||
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
|
||||
output logic XSubnorm, // is X subnormal
|
||||
output logic XZero, YZero, ZZero, // is XYZ zero
|
||||
output logic XInf, YInf, ZInf, // is XYZ infinity
|
||||
output logic XExpMax, // does X have the maximum exponent (NaN or Inf)
|
||||
output logic [`FLEN-1:0] XPostBox // X after being properly NaN-boxed
|
||||
output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed
|
||||
);
|
||||
|
||||
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
|
||||
logic YExpMax, ZExpMax; // is the exponent all 1s
|
||||
|
||||
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
|
||||
unpackinput #(P) unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
|
||||
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
|
||||
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero),
|
||||
.Subnorm(XSubnorm), .PostBox(XPostBox));
|
||||
|
||||
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
|
||||
unpackinput #(P) unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
|
||||
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
|
||||
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero),
|
||||
.Subnorm(), .PostBox());
|
||||
|
||||
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
|
||||
unpackinput #(P) unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
|
||||
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
|
||||
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero),
|
||||
.Subnorm(), .PostBox());
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -25,15 +25,14 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module unpackinput (
|
||||
input logic [`FLEN-1:0] In, // inputs from register file
|
||||
module unpackinput import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FLEN-1:0] In, // inputs from register file
|
||||
input logic En, // enable the input
|
||||
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
input logic [P.FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic Sgn, // sign bits of the number
|
||||
output logic [`NE-1:0] Exp, // exponent of the number (converted to largest supported precision)
|
||||
output logic [`NF:0] Man, // mantissa of the number (converted to largest supported precision)
|
||||
output logic [P.NE-1:0] Exp, // exponent of the number (converted to largest supported precision)
|
||||
output logic [P.NF:0] Man, // mantissa of the number (converted to largest supported precision)
|
||||
output logic NaN, // is the number a NaN
|
||||
output logic SNaN, // is the number a signaling NaN
|
||||
output logic Zero, // is the number zero
|
||||
@ -42,29 +41,29 @@ module unpackinput (
|
||||
output logic FracZero, // is the fraction zero
|
||||
output logic ExpMax, // does In have the maximum exponent (NaN or Inf)
|
||||
output logic Subnorm, // is the number subnormal
|
||||
output logic [`FLEN-1:0] PostBox // Number reboxed correctly as a NaN
|
||||
output logic [P.FLEN-1:0] PostBox // Number reboxed correctly as a NaN
|
||||
);
|
||||
|
||||
logic [`NF-1:0] Frac; // Fraction of XYZ
|
||||
logic [P.NF-1:0] Frac; // Fraction of XYZ
|
||||
logic BadNaNBox; // incorrectly NaN Boxed
|
||||
|
||||
if (`FPSIZES == 1) begin // if there is only one floating point format supported
|
||||
if (P.FPSIZES == 1) begin // if there is only one floating point format supported
|
||||
assign BadNaNBox = 0;
|
||||
assign Sgn = In[`FLEN-1]; // sign bit
|
||||
assign Frac = In[`NF-1:0]; // fraction (no assumed 1)
|
||||
assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero
|
||||
assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
|
||||
assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's
|
||||
assign Sgn = In[P.FLEN-1]; // sign bit
|
||||
assign Frac = In[P.NF-1:0]; // fraction (no assumed 1)
|
||||
assign ExpNonZero = |In[P.FLEN-2:P.NF]; // is the exponent non-zero
|
||||
assign Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
|
||||
assign ExpMax = &In[P.FLEN-2:P.NF]; // is the exponent all 1's
|
||||
assign PostBox = In;
|
||||
|
||||
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
|
||||
end else if (P.FPSIZES == 2) begin // if there are 2 floating point formats supported
|
||||
// largest format | smaller format
|
||||
//----------------------------------
|
||||
// `FLEN | `LEN1 length of floating point number
|
||||
// `NE | `NE1 length of exponent
|
||||
// `NF | `NF1 length of fraction
|
||||
// `BIAS | `BIAS1 exponent's bias value
|
||||
// `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
// P.FLEN | P.LEN1 length of floating point number
|
||||
// P.NE | P.NE1 length of exponent
|
||||
// P.NF | P.NF1 length of fraction
|
||||
// P.BIAS | P.BIAS1 exponent's bias value
|
||||
// P.FMT | P.FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// double and single
|
||||
@ -76,22 +75,22 @@ module unpackinput (
|
||||
// quad and half
|
||||
// double and half
|
||||
|
||||
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
|
||||
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
|
||||
always_comb
|
||||
if (BadNaNBox) begin
|
||||
// PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
|
||||
PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
|
||||
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
||||
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||
end else
|
||||
PostBox = In;
|
||||
|
||||
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
|
||||
assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive
|
||||
assign Sgn = Fmt ? In[P.FLEN-1] : (BadNaNBox ? 0 : In[P.LEN1-1]); // improperly boxed NaNs are treated as positive
|
||||
|
||||
// extract the fraction, add trailing zeroes to the mantissa if nessisary
|
||||
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
assign Frac = Fmt ? In[P.NF-1:0] : {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};
|
||||
|
||||
// is the exponent non-zero
|
||||
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
|
||||
assign ExpNonZero = Fmt ? |In[P.FLEN-2:P.NF] : |In[P.LEN1-2:P.NF1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
@ -103,21 +102,21 @@ module unpackinput (
|
||||
|
||||
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
|
||||
// - if the original precision had a Subnormal number convert the exponent value 1
|
||||
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
assign Exp = Fmt ? {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero} : {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero};
|
||||
|
||||
// is the exponent all 1's
|
||||
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
|
||||
assign ExpMax = Fmt ? &In[P.FLEN-2:P.NF] : &In[P.LEN1-2:P.NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin // three floating point precsions supported
|
||||
end else if (P.FPSIZES == 3) begin // three floating point precsions supported
|
||||
|
||||
// largest format | larger format | smallest format
|
||||
//---------------------------------------------------
|
||||
// `FLEN | `LEN1 | `LEN2 length of floating point number
|
||||
// `NE | `NE1 | `NE2 length of exponent
|
||||
// `NF | `NF1 | `NF2 length of fraction
|
||||
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
|
||||
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
// P.FLEN | P.LEN1 | P.LEN2 length of floating point number
|
||||
// P.NE | P.NE1 | P.NE2 length of exponent
|
||||
// P.NF | P.NF1 | P.NF2 length of fraction
|
||||
// P.BIAS | P.BIAS1 | P.BIAS2 exponent's bias value
|
||||
// P.FMT | P.FMT1 | P.FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// quad and double and single
|
||||
@ -130,20 +129,20 @@ module unpackinput (
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: BadNaNBox = 0;
|
||||
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
|
||||
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
|
||||
P.FMT: BadNaNBox = 0;
|
||||
P.FMT1: BadNaNBox = ~&In[P.FLEN-1:P.LEN1];
|
||||
P.FMT2: BadNaNBox = ~&In[P.FLEN-1:P.LEN2];
|
||||
default: BadNaNBox = 1'bx;
|
||||
endcase
|
||||
|
||||
always_comb
|
||||
if (BadNaNBox) begin
|
||||
case (Fmt)
|
||||
`FMT: PostBox = In;
|
||||
// `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
|
||||
// `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]};
|
||||
`FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
|
||||
`FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}};
|
||||
P.FMT: PostBox = In;
|
||||
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
||||
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
|
||||
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
|
||||
default: PostBox = 'x;
|
||||
endcase
|
||||
end else
|
||||
@ -154,27 +153,27 @@ module unpackinput (
|
||||
if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
|
||||
else
|
||||
case (Fmt)
|
||||
`FMT: Sgn = In[`FLEN-1];
|
||||
`FMT1: Sgn = In[`LEN1-1];
|
||||
`FMT2: Sgn = In[`LEN2-1];
|
||||
P.FMT: Sgn = In[P.FLEN-1];
|
||||
P.FMT1: Sgn = In[P.LEN1-1];
|
||||
P.FMT2: Sgn = In[P.LEN2-1];
|
||||
default: Sgn = 1'bx;
|
||||
endcase
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: Frac = In[`NF-1:0];
|
||||
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
default: Frac = {`NF{1'bx}};
|
||||
P.FMT: Frac = In[P.NF-1:0];
|
||||
P.FMT1: Frac = {In[P.NF1-1:0], (P.NF-P.NF1)'(0)};
|
||||
P.FMT2: Frac = {In[P.NF2-1:0], (P.NF-P.NF2)'(0)};
|
||||
default: Frac = {P.NF{1'bx}};
|
||||
endcase
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
|
||||
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
|
||||
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
|
||||
P.FMT: ExpNonZero = |In[P.FLEN-2:P.NF]; // if input is largest precision (P.FLEN - ie quad or double)
|
||||
P.FMT1: ExpNonZero = |In[P.LEN1-2:P.NF1]; // if input is larger precsion (P.LEN1 - double or single)
|
||||
P.FMT2: ExpNonZero = |In[P.LEN2-2:P.NF2]; // if input is smallest precsion (P.LEN2 - single or half)
|
||||
default: ExpNonZero = 1'bx;
|
||||
endcase
|
||||
|
||||
@ -189,50 +188,50 @@ module unpackinput (
|
||||
// convert the larger precision's exponent to use the largest precision's bias
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
|
||||
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
|
||||
default: Exp = {`NE{1'bx}};
|
||||
P.FMT: Exp = {In[P.FLEN-2:P.NF+1], In[P.NF]|~ExpNonZero};
|
||||
P.FMT1: Exp = {In[P.LEN1-2], {P.NE-P.NE1{~In[P.LEN1-2]}}, In[P.LEN1-3:P.NF1+1], In[P.NF1]|~ExpNonZero};
|
||||
P.FMT2: Exp = {In[P.LEN2-2], {P.NE-P.NE2{~In[P.LEN2-2]}}, In[P.LEN2-3:P.NF2+1], In[P.NF2]|~ExpNonZero};
|
||||
default: Exp = {P.NE{1'bx}};
|
||||
endcase
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (Fmt)
|
||||
`FMT: ExpMax = &In[`FLEN-2:`NF];
|
||||
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
|
||||
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
|
||||
P.FMT: ExpMax = &In[P.FLEN-2:P.NF];
|
||||
P.FMT1: ExpMax = &In[P.LEN1-2:P.NF1];
|
||||
P.FMT2: ExpMax = &In[P.LEN2-2:P.NF2];
|
||||
default: ExpMax = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
|
||||
end else if (P.FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
|
||||
|
||||
// quad | double | single | half
|
||||
//-------------------------------------------------------------------
|
||||
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
|
||||
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
|
||||
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
|
||||
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
|
||||
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
// P.Q_LEN | P.D_LEN | P.S_LEN | P.H_LEN length of floating point number
|
||||
// P.Q_NE | P.D_NE | P.S_NE | P.H_NE length of exponent
|
||||
// P.Q_NF | P.D_NF | P.S_NF | P.H_NF length of fraction
|
||||
// P.Q_BIAS | P.D_BIAS | P.S_BIAS | P.H_BIAS exponent's bias value
|
||||
// P.Q_FMT | P.D_FMT | P.S_FMT | P.H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
|
||||
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'b11: BadNaNBox = 0;
|
||||
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
|
||||
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
|
||||
2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
|
||||
2'b01: BadNaNBox = ~&In[P.Q_LEN-1:P.D_LEN];
|
||||
2'b00: BadNaNBox = ~&In[P.Q_LEN-1:P.S_LEN];
|
||||
2'b10: BadNaNBox = ~&In[P.Q_LEN-1:P.H_LEN];
|
||||
endcase
|
||||
|
||||
always_comb
|
||||
if (BadNaNBox) begin
|
||||
case (Fmt)
|
||||
2'b11: PostBox = In;
|
||||
// 2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]};
|
||||
// 2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]};
|
||||
// 2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]};
|
||||
2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}};
|
||||
2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}};
|
||||
2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}};
|
||||
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
|
||||
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
|
||||
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
|
||||
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
|
||||
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
|
||||
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
|
||||
endcase
|
||||
end else
|
||||
PostBox = In;
|
||||
@ -242,29 +241,29 @@ module unpackinput (
|
||||
if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
|
||||
else
|
||||
case (Fmt)
|
||||
2'b11: Sgn = In[`Q_LEN-1];
|
||||
2'b01: Sgn = In[`D_LEN-1];
|
||||
2'b00: Sgn = In[`S_LEN-1];
|
||||
2'b10: Sgn = In[`H_LEN-1];
|
||||
2'b11: Sgn = In[P.Q_LEN-1];
|
||||
2'b01: Sgn = In[P.D_LEN-1];
|
||||
2'b00: Sgn = In[P.S_LEN-1];
|
||||
2'b10: Sgn = In[P.H_LEN-1];
|
||||
endcase
|
||||
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'b11: Frac = In[`Q_NF-1:0];
|
||||
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
2'b11: Frac = In[P.Q_NF-1:0];
|
||||
2'b01: Frac = {In[P.D_NF-1:0], (P.Q_NF-P.D_NF)'(0)};
|
||||
2'b00: Frac = {In[P.S_NF-1:0], (P.Q_NF-P.S_NF)'(0)};
|
||||
2'b10: Frac = {In[P.H_NF-1:0], (P.Q_NF-P.H_NF)'(0)};
|
||||
endcase
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
|
||||
2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF];
|
||||
2'b11: ExpNonZero = |In[P.Q_LEN-2:P.Q_NF];
|
||||
2'b01: ExpNonZero = |In[P.D_LEN-2:P.D_NF];
|
||||
2'b00: ExpNonZero = |In[P.S_LEN-2:P.S_NF];
|
||||
2'b10: ExpNonZero = |In[P.H_LEN-2:P.H_NF];
|
||||
endcase
|
||||
|
||||
|
||||
@ -280,20 +279,20 @@ module unpackinput (
|
||||
// 1 is added to the exponent if the input is zero or subnormal
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
|
||||
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
|
||||
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
|
||||
2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero};
|
||||
2'b11: Exp = {In[P.Q_LEN-2:P.Q_NF+1], In[P.Q_NF]|~ExpNonZero};
|
||||
2'b01: Exp = {In[P.D_LEN-2], {P.Q_NE-P.D_NE{~In[P.D_LEN-2]}}, In[P.D_LEN-3:P.D_NF+1], In[P.D_NF]|~ExpNonZero};
|
||||
2'b00: Exp = {In[P.S_LEN-2], {P.Q_NE-P.S_NE{~In[P.S_LEN-2]}}, In[P.S_LEN-3:P.S_NF+1], In[P.S_NF]|~ExpNonZero};
|
||||
2'b10: Exp = {In[P.H_LEN-2], {P.Q_NE-P.H_NE{~In[P.H_LEN-2]}}, In[P.H_LEN-3:P.H_NF+1], In[P.H_NF]|~ExpNonZero};
|
||||
endcase
|
||||
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
|
||||
2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
|
||||
2'b11: ExpMax = &In[P.Q_LEN-2:P.Q_NF];
|
||||
2'b01: ExpMax = &In[P.D_LEN-2:P.D_NF];
|
||||
2'b00: ExpMax = &In[P.S_LEN-2:P.S_NF];
|
||||
2'b10: ExpMax = &In[P.H_LEN-2:P.H_NF];
|
||||
endcase
|
||||
|
||||
end
|
||||
@ -302,9 +301,9 @@ module unpackinput (
|
||||
assign FracZero = ~|Frac & ~BadNaNBox; // is the fraction zero?
|
||||
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand
|
||||
assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
|
||||
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
|
||||
assign SNaN = NaN&~Frac[P.NF-1]&~BadNaNBox; // is the input a singnaling NaN?
|
||||
assign Inf = ExpMax & FracZero & En; // is the input infinity?
|
||||
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
|
||||
assign Subnorm = ~ExpNonZero & ~FracZero & ~BadNaNBox; // is the input subnormal
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -26,8 +26,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module hazard (
|
||||
// Detect hazards
|
||||
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
|
||||
|
@ -27,9 +27,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module bmuctrl(
|
||||
module bmuctrl import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
// Decode stage control signals
|
||||
input logic StallD, FlushD, // Stall, flush Decode stage
|
||||
@ -76,13 +74,13 @@ module bmuctrl(
|
||||
always_comb begin
|
||||
// BALUSelect_BSelect_ZBBSelect_BRegWrite_BALUSrcB_BW64_BALUOp_BSubArithD_RotateD_MaskD_PreShiftD_IllegalBitmanipInstrD
|
||||
BMUControlsD = `BMUCTRLW'b000_00_000_0_0_0_0_0_0_0_0_1; // default: Illegal bmu instruction;
|
||||
if (`ZBA_SUPPORTED) begin
|
||||
if (P.ZBA_SUPPORTED) begin
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh1add
|
||||
17'b0110011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh2add
|
||||
17'b0110011_0010000_110: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_0_1_0_0_0_1_0; // sh3add
|
||||
endcase
|
||||
if (`XLEN==64)
|
||||
if (P.XLEN==64)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0111011_0010000_010: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh1add.uw
|
||||
17'b0111011_0010000_100: BMUControlsD = `BMUCTRLW'b000_01_000_1_0_1_1_0_0_0_1_0; // sh2add.uw
|
||||
@ -91,7 +89,7 @@ module bmuctrl(
|
||||
17'b0011011_000010?_001: BMUControlsD = `BMUCTRLW'b001_01_000_1_1_1_1_0_0_0_0_0; // slli.uw
|
||||
endcase
|
||||
end
|
||||
if (`ZBB_SUPPORTED) begin
|
||||
if (P.ZBB_SUPPORTED) begin
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0110000_001: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // rol
|
||||
17'b0110011_0110000_101: BMUControlsD = `BMUCTRLW'b001_01_111_1_0_0_1_0_1_0_0_0; // ror
|
||||
@ -100,13 +98,13 @@ module bmuctrl(
|
||||
else if ((Rs2D[4:2]==3'b000) & ~(Rs2D[1] & Rs2D[0]))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_0_1_0_0_0_0_0; // count instruction
|
||||
// // coverage off: This case can't occur in RV64
|
||||
// 17'b0110011_0000100_100: if (`XLEN == 32)
|
||||
// 17'b0110011_0000100_100: if (P.XLEN == 32)
|
||||
// BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
|
||||
// // coverage on
|
||||
17'b0110011_0100000_111: BMUControlsD = `BMUCTRLW'b111_01_111_1_0_0_1_1_0_0_0_0; // andn
|
||||
17'b0110011_0100000_110: BMUControlsD = `BMUCTRLW'b110_01_111_1_0_0_1_1_0_0_0_0; // orn
|
||||
17'b0110011_0100000_100: BMUControlsD = `BMUCTRLW'b100_01_111_1_0_0_1_1_0_0_0_0; // xnor
|
||||
17'b0010011_011010?_101: if ((`XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
|
||||
17'b0010011_011010?_101: if ((P.XLEN == 32 ^ Funct7D[0]) & (Rs2D == 5'b11000))
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // rev8
|
||||
17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
|
||||
BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0; // orc.b
|
||||
@ -115,12 +113,12 @@ module bmuctrl(
|
||||
17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // min
|
||||
17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0; // minu
|
||||
endcase
|
||||
if (`XLEN==32)
|
||||
if (P.XLEN==32)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_1_0_1_0_0_0_0_0; // zexth (rv32)
|
||||
17'b0010011_0110000_101: BMUControlsD = `BMUCTRLW'b001_00_111_1_1_0_1_0_1_0_0_0; // rori (rv32)
|
||||
endcase
|
||||
else if (`XLEN==64)
|
||||
else if (P.XLEN==64)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0111011_0000100_100: BMUControlsD = `BMUCTRLW'b000_10_001_1_0_0_1_0_0_0_0_0; // zexth (rv64)
|
||||
17'b0111011_0110000_001: BMUControlsD = `BMUCTRLW'b001_00_111_1_0_1_1_0_1_0_0_0; // rolw
|
||||
@ -131,25 +129,25 @@ module bmuctrl(
|
||||
BMUControlsD = `BMUCTRLW'b000_10_000_1_1_1_1_0_0_0_0_0; // count word instruction
|
||||
endcase
|
||||
end
|
||||
if (`ZBC_SUPPORTED)
|
||||
if (P.ZBC_SUPPORTED)
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0000101_0??: BMUControlsD = `BMUCTRLW'b000_11_000_1_0_0_1_0_0_0_0_0; // ZBC instruction
|
||||
endcase
|
||||
if (`ZBS_SUPPORTED) begin // ZBS
|
||||
if (P.ZBS_SUPPORTED) begin // ZBS
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_0_0_1_1_0_1_0_0; // bclr
|
||||
17'b0110011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_0_0_1_1_0_1_0_0; // bext
|
||||
17'b0110011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_0_0_1_0_0_1_0_0; // binv
|
||||
17'b0110011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_0_0_1_0_0_1_0_0; // bset
|
||||
endcase
|
||||
if (`XLEN==32) // ZBS 64-bit
|
||||
if (P.XLEN==32) // ZBS 64-bit
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0010011_0100100_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri
|
||||
17'b0010011_0100100_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti
|
||||
17'b0010011_0110100_001: BMUControlsD = `BMUCTRLW'b100_01_000_1_1_0_1_0_0_1_0_0; // binvi
|
||||
17'b0010011_0010100_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti
|
||||
endcase
|
||||
else if (`XLEN==64) // ZBS 64-bit
|
||||
else if (P.XLEN==64) // ZBS 64-bit
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0010011_010010?_001: BMUControlsD = `BMUCTRLW'b111_01_000_1_1_0_1_1_0_1_0_0; // bclri (rv64)
|
||||
17'b0010011_010010?_101: BMUControlsD = `BMUCTRLW'b101_01_000_1_1_0_1_1_0_1_0_0; // bexti (rv64)
|
||||
@ -157,7 +155,7 @@ module bmuctrl(
|
||||
17'b0010011_001010?_001: BMUControlsD = `BMUCTRLW'b110_01_000_1_1_0_1_0_0_1_0_0; // bseti (rv64)
|
||||
endcase
|
||||
end
|
||||
if (`ZBB_SUPPORTED | `ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
|
||||
if (P.ZBB_SUPPORTED | P.ZBS_SUPPORTED) // rv32i/64i shift instructions need BMU ALUSelect when BMU shifter is used
|
||||
casez({OpD, Funct7D, Funct3D})
|
||||
17'b0110011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_0_0_1_0_0_0_0_0; // sra, srl, sll
|
||||
17'b0010011_0?0000?_?01: BMUControlsD = `BMUCTRLW'b001_00_000_1_1_0_1_0_0_0_0_0; // srai, srli, slli
|
||||
@ -176,5 +174,5 @@ module bmuctrl(
|
||||
assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);
|
||||
|
||||
// BMU Execute stage pipieline control register
|
||||
flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
|
||||
flopenrc #(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
|
||||
endmodule
|
||||
|
@ -27,10 +27,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
|
||||
module controller(
|
||||
module controller import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
// Decode stage control signals
|
||||
input logic StallD, FlushD, // Stall, flush Decode stage
|
||||
@ -142,30 +139,30 @@ module controller(
|
||||
// Be rigorous about detecting illegal instructions if CSRs or bit manipulation is supported
|
||||
// otherwise be cheap
|
||||
|
||||
if (`ZICSR_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED | `ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
|
||||
if (P.ZICSR_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED | P.ZBS_SUPPORTED) begin:legalcheck // Exact integer decoding
|
||||
logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD;
|
||||
logic Funct7ShiftZeroD, Funct7Shiftb5D;
|
||||
|
||||
assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions
|
||||
assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub
|
||||
assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
|
||||
assign Funct7Shiftb5D = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
|
||||
assign Funct7ShiftZeroD = (P.XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD;
|
||||
assign Funct7Shiftb5D = (P.XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D;
|
||||
assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms
|
||||
assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101));
|
||||
assign IFunctD = IShiftD | INoShiftD;
|
||||
assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD;
|
||||
assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign MFunctD = (Funct7D == 7'b0000001) & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
|
||||
((P.XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110));
|
||||
assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 |
|
||||
((`XLEN == 64) & (Funct3D == 3'b011));
|
||||
((P.XLEN == 64) & (Funct3D == 3'b011));
|
||||
assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches
|
||||
assign JFunctD = (Funct3D == 3'b000);
|
||||
assign IWValidFunct3D = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b101;
|
||||
end else begin:legalcheck2
|
||||
assign IFunctD = 1; // Don't bother to separate out shift decoding
|
||||
assign RFunctD = ~Funct7D[0]; // Not a multiply
|
||||
assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign MFunctD = Funct7D[0] & (P.M_SUPPORTED | (P.ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv
|
||||
assign LFunctD = 1; // don't bother to check Funct3 for loads
|
||||
assign SFunctD = 1; // don't bother to check Funct3 for stores
|
||||
assign BFunctD = 1; // don't bother to check Funct3 for branches
|
||||
@ -182,19 +179,19 @@ module controller(
|
||||
7'b0000011: if (LFunctD)
|
||||
ControlsD = `CTRLW'b1_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // loads
|
||||
7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported
|
||||
7'b0001111: if (`ZIFENCEI_SUPPORTED)
|
||||
7'b0001111: if (P.ZIFENCEI_SUPPORTED)
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence
|
||||
else
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop
|
||||
7'b0010011: if (IFunctD)
|
||||
ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
|
||||
7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc
|
||||
7'b0011011: if (IFunctD & IWValidFunct3D & `XLEN == 64)
|
||||
7'b0011011: if (IFunctD & IWValidFunct3D & P.XLEN == 64)
|
||||
ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_1_0_0_0_0_00_0; // IW-type ALU for RV64i
|
||||
7'b0100011: if (SFunctD)
|
||||
ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // stores
|
||||
7'b0100111: ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_1; // fsw - only legal if FP supported
|
||||
7'b0101111: if (`A_SUPPORTED) begin
|
||||
7'b0101111: if (P.A_SUPPORTED) begin
|
||||
if (InstrD[31:27] == 5'b00010)
|
||||
ControlsD = `CTRLW'b1_000_00_10_001_0_0_0_0_0_0_0_0_0_01_0; // lr
|
||||
else if (InstrD[31:27] == 5'b00011)
|
||||
@ -207,16 +204,16 @@ module controller(
|
||||
else if (MFunctD)
|
||||
ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide
|
||||
7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui
|
||||
7'b0111011: if (RFunctD & (`XLEN == 64))
|
||||
7'b0111011: if (RFunctD & (P.XLEN == 64))
|
||||
ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i
|
||||
else if (MFunctD & (`XLEN == 64))
|
||||
else if (MFunctD & (P.XLEN == 64))
|
||||
ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide
|
||||
7'b1100011: if (BFunctD)
|
||||
ControlsD = `CTRLW'b0_010_11_00_000_1_0_0_0_0_0_0_0_0_00_0; // branches
|
||||
7'b1100111: if (JFunctD)
|
||||
ControlsD = `CTRLW'b1_000_01_00_000_0_0_1_1_0_0_0_0_0_00_0; // jalr
|
||||
7'b1101111: ControlsD = `CTRLW'b1_011_11_00_000_0_0_1_1_0_0_0_0_0_00_0; // jal
|
||||
7'b1110011: if (`ZICSR_SUPPORTED) begin
|
||||
7'b1110011: if (P.ZICSR_SUPPORTED) begin
|
||||
if (Funct3D == 3'b000)
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_1_0_0_00_0; // privileged; decoded further in priveleged modules
|
||||
else
|
||||
@ -229,7 +226,7 @@ module controller(
|
||||
// Unswizzle control bits
|
||||
// Squash control signals if coming from an illegal compressed instruction
|
||||
// On RV32E, can't write to upper 16 registers. Checking reads to upper 16 is more costly so disregard them.
|
||||
assign IllegalERegAdrD = `E_SUPPORTED & `ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11];
|
||||
assign IllegalERegAdrD = P.E_SUPPORTED & P.ZICSR_SUPPORTED & ControlsD[`CTRLW-1] & InstrD[11];
|
||||
//assign IllegalBaseInstrD = 1'b0;
|
||||
assign {BaseRegWriteD, ImmSrcD, ALUSrcAD, BaseALUSrcBD, MemRWD,
|
||||
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, BaseW64D, CSRReadD,
|
||||
@ -247,17 +244,17 @@ module controller(
|
||||
assign BaseSubArithD = ALUOpD & (subD | sraD | sltD | sltuD);
|
||||
|
||||
// bit manipulation Configuration Block
|
||||
if (`ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED | `ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
|
||||
if (P.ZBS_SUPPORTED | P.ZBA_SUPPORTED | P.ZBB_SUPPORTED | P.ZBC_SUPPORTED) begin: bitmanipi //change the conditional expression to OR any Z supported flags
|
||||
logic IllegalBitmanipInstrD; // Unrecognized B instruction
|
||||
logic BRegWriteD; // Indicates if it is a R type BMU instruction in decode stage
|
||||
logic BW64D; // Indicates if it is a W type BMU instruction in decode stage
|
||||
logic BSubArithD; // TRUE for BMU ext, clr, andn, orn, xnor
|
||||
logic BALUSrcBD; // BMU alu src select signal
|
||||
|
||||
bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
|
||||
bmuctrl #(P) bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD,
|
||||
.BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE,
|
||||
.ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
|
||||
if (`ZBA_SUPPORTED) begin
|
||||
if (P.ZBA_SUPPORTED) begin
|
||||
// ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
|
||||
assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
|
||||
end else assign sltD = (Funct3D == 3'b010);
|
||||
@ -290,7 +287,7 @@ module controller(
|
||||
// Fences
|
||||
// Ordinary fence is presently a nop
|
||||
// fence.i flushes the D$ and invalidates the I$ if Zifencei is supported and I$ is implemented
|
||||
if (`ZIFENCEI_SUPPORTED & `ICACHE_SUPPORTED) begin:fencei
|
||||
if (P.ZIFENCEI_SUPPORTED & P.ICACHE_SUPPORTED) begin:fencei
|
||||
logic FenceID;
|
||||
assign FenceID = FenceXD & (Funct3D == 3'b001); // is it a FENCE.I instruction?
|
||||
assign InvalidateICacheD = FenceID;
|
||||
@ -338,5 +335,5 @@ module controller(
|
||||
|
||||
// the synchronous DTIM cannot read immediately after write
|
||||
// a cache cannot read or write immediately after a write
|
||||
assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & `DCACHE_SUPPORTED)) | (|AtomicD));
|
||||
assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED)) | (|AtomicD));
|
||||
endmodule
|
||||
|
@ -27,16 +27,14 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module datapath (
|
||||
module datapath import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
// Decode stage signals
|
||||
input logic [2:0] ImmSrcD, // Selects type of immediate extension
|
||||
input logic [31:0] InstrD, // Instruction in Decode stage
|
||||
// Execute stage signals
|
||||
input logic [`XLEN-1:0] PCE, // PC in Execute stage
|
||||
input logic [`XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
|
||||
input logic [P.XLEN-1:0] PCE, // PC in Execute stage
|
||||
input logic [P.XLEN-1:0] PCLinkE, // PC + 4 (of instruction in Execute stage)
|
||||
input logic [2:0] Funct3E, // Funct3 field of instruction in Execute stage
|
||||
input logic StallE, FlushE, // Stall, flush Execute stage
|
||||
input logic [1:0] ForwardAE, ForwardBE, // Forward ALU operands from later stages
|
||||
@ -51,24 +49,24 @@ module datapath (
|
||||
input logic [2:0] ZBBSelectE, // ZBB mux select signal
|
||||
input logic [2:0] BALUControlE, // ALU Control signals for B instructions in Execute Stage
|
||||
output logic [1:0] FlagsE, // Comparison flags ({eq, lt})
|
||||
output logic [`XLEN-1:0] IEUAdrE, // Address computed by ALU
|
||||
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
|
||||
output logic [P.XLEN-1:0] IEUAdrE, // Address computed by ALU
|
||||
output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU sources before the mux chooses between them and PCE to put in srcA/B
|
||||
// Memory stage signals
|
||||
input logic StallM, FlushM, // Stall, flush Memory stage
|
||||
input logic FWriteIntM, FCvtIntW, // FPU writes integer register file, FPU converts float to int
|
||||
input logic [`XLEN-1:0] FIntResM, // FPU integer result
|
||||
output logic [`XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes
|
||||
output logic [`XLEN-1:0] WriteDataM, // Write data in Memory stage
|
||||
input logic [P.XLEN-1:0] FIntResM, // FPU integer result
|
||||
output logic [P.XLEN-1:0] SrcAM, // ALU's Source A in Memory stage to privilege unit for CSR writes
|
||||
output logic [P.XLEN-1:0] WriteDataM, // Write data in Memory stage
|
||||
// Writeback stage signals
|
||||
input logic StallW, FlushW, // Stall, flush Writeback stage
|
||||
input logic RegWriteW, IntDivW, // Write register file, integer divide instruction
|
||||
input logic SquashSCW, // Squash a store conditional when a conflict arose
|
||||
input logic [2:0] ResultSrcW, // Select source of result to write back to register file
|
||||
input logic [`XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result
|
||||
input logic [`XLEN-1:0] ReadDataW, // Read data from LSU
|
||||
input logic [`XLEN-1:0] CSRReadValW, // CSR read result
|
||||
input logic [`XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result
|
||||
input logic [`XLEN-1:0] FIntDivResultW, // FPU's integer divide result
|
||||
input logic [P.XLEN-1:0] FCvtIntResW, // FPU convert fp to integer result
|
||||
input logic [P.XLEN-1:0] ReadDataW, // Read data from LSU
|
||||
input logic [P.XLEN-1:0] CSRReadValW, // CSR read result
|
||||
input logic [P.XLEN-1:0] MDUResultW, // MDU (Multiply/divide unit) result
|
||||
input logic [P.XLEN-1:0] FIntDivResultW, // FPU's integer divide result
|
||||
// Hazard Unit signals
|
||||
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, // Register sources to read in Decode or Execute stage
|
||||
output logic [4:0] RdE, RdM, RdW // Register destinations in Execute, Memory, or Writeback stage
|
||||
@ -76,64 +74,64 @@ module datapath (
|
||||
|
||||
// Fetch stage signals
|
||||
// Decode stage signals
|
||||
logic [`XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2)
|
||||
logic [`XLEN-1:0] ImmExtD; // Extended immediate in Decode stage
|
||||
logic [P.XLEN-1:0] R1D, R2D; // Read data from Rs1 (RD1), Rs2 (RD2)
|
||||
logic [P.XLEN-1:0] ImmExtD; // Extended immediate in Decode stage
|
||||
logic [4:0] RdD; // Destination register in Decode stage
|
||||
// Execute stage signals
|
||||
logic [`XLEN-1:0] R1E, R2E; // Source operands read from register file
|
||||
logic [`XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
|
||||
logic [`XLEN-1:0] SrcAE, SrcBE; // ALU operands
|
||||
logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
|
||||
logic [P.XLEN-1:0] R1E, R2E; // Source operands read from register file
|
||||
logic [P.XLEN-1:0] ImmExtE; // Extended immediate in Execute stage
|
||||
logic [P.XLEN-1:0] SrcAE, SrcBE; // ALU operands
|
||||
logic [P.XLEN-1:0] ALUResultE, AltResultE, IEUResultE; // ALU result, Alternative result (ImmExtE or PC+4), result of execution stage
|
||||
// Memory stage signals
|
||||
logic [`XLEN-1:0] IEUResultM; // Result from execution stage
|
||||
logic [`XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
|
||||
logic [P.XLEN-1:0] IEUResultM; // Result from execution stage
|
||||
logic [P.XLEN-1:0] IFResultM; // Result from either IEU or single-cycle FPU op writing an integer register
|
||||
// Writeback stage signals
|
||||
logic [`XLEN-1:0] SCResultW; // Store Conditional result
|
||||
logic [`XLEN-1:0] ResultW; // Result to write to register file
|
||||
logic [`XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register
|
||||
logic [`XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int
|
||||
logic [`XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division)
|
||||
logic [P.XLEN-1:0] SCResultW; // Store Conditional result
|
||||
logic [P.XLEN-1:0] ResultW; // Result to write to register file
|
||||
logic [P.XLEN-1:0] IFResultW; // Result from either IEU or single-cycle FPU op writing an integer register
|
||||
logic [P.XLEN-1:0] IFCvtResultW; // Result from IEU, signle-cycle FPU op, or 2-cycle FCVT float to int
|
||||
logic [P.XLEN-1:0] MulDivResultW; // Multiply always comes from MDU. Divide could come from MDU or FPU (when using fdivsqrt for integer division)
|
||||
|
||||
// Decode stage
|
||||
assign Rs1D = InstrD[19:15];
|
||||
assign Rs2D = InstrD[24:20];
|
||||
assign RdD = InstrD[11:7];
|
||||
regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
|
||||
extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
|
||||
regfile #(P.XLEN, P.E_SUPPORTED) regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, R1D, R2D);
|
||||
extend #(P.XLEN, P.A_SUPPORTED) ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ImmExtD);
|
||||
|
||||
// Execute stage pipeline register and logic
|
||||
flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
|
||||
flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
|
||||
flopenrc #(`XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
|
||||
flopenrc #(P.XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E);
|
||||
flopenrc #(P.XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E);
|
||||
flopenrc #(P.XLEN) ImmExtEReg(clk, reset, FlushE, ~StallE, ImmExtD, ImmExtE);
|
||||
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
|
||||
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
|
||||
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
|
||||
|
||||
mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
|
||||
mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
|
||||
comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
|
||||
mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
|
||||
mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
|
||||
alu #(`XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
|
||||
mux2 #(`XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
|
||||
mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
|
||||
mux3 #(P.XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
|
||||
mux3 #(P.XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
|
||||
comparator #(P.XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
|
||||
mux2 #(P.XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
|
||||
mux2 #(P.XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
|
||||
alu #(P.XLEN) alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
|
||||
mux2 #(P.XLEN) altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
|
||||
mux2 #(P.XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);
|
||||
|
||||
// Memory stage pipeline register
|
||||
flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
|
||||
flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
|
||||
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
|
||||
flopenrc #(P.XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
|
||||
flopenrc #(P.XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM);
|
||||
flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(P.XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM);
|
||||
|
||||
// Writeback stage pipeline register and logic
|
||||
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
flopenrc #(P.XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
|
||||
// floating point inputs: FIntResM comes from fclass, fcmp, fmv; FCvtIntResW comes from fcvt
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||
if (`IDIV_ON_FPU) begin
|
||||
mux2 #(`XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
|
||||
if (P.F_SUPPORTED) begin:fpmux
|
||||
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||
if (P.IDIV_ON_FPU) begin
|
||||
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
|
||||
end else begin
|
||||
assign MulDivResultW = MDUResultW;
|
||||
end
|
||||
@ -142,9 +140,9 @@ module datapath (
|
||||
assign IFCvtResultW = IFResultW;
|
||||
assign MulDivResultW = MDUResultW;
|
||||
end
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
|
||||
mux5 #(P.XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
|
||||
if (P.A_SUPPORTED) assign SCResultW = {{(P.XLEN-1){1'b0}}, SquashSCW};
|
||||
else assign SCResultW = 0;
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -27,29 +27,27 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module extend (
|
||||
module extend #(parameter XLEN, A_SUPPORTED) (
|
||||
input logic [31:7] InstrD, // All instruction bits except opcode (lower 7 bits)
|
||||
input logic [2:0] ImmSrcD, // Select what kind of extension to perform
|
||||
output logic [`XLEN-1:0 ] ImmExtD); // Extended immediate
|
||||
output logic [XLEN-1:0 ] ImmExtD); // Extended immediate
|
||||
|
||||
localparam [`XLEN-1:0] undefined = {(`XLEN){1'bx}}; // could change to 0 after debug
|
||||
localparam [XLEN-1:0] undefined = {(XLEN){1'bx}}; // could change to 0 after debug
|
||||
|
||||
always_comb
|
||||
case(ImmSrcD)
|
||||
// I-type
|
||||
3'b000: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:20]};
|
||||
3'b000: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:20]};
|
||||
// S-type (stores)
|
||||
3'b001: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]};
|
||||
3'b001: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]};
|
||||
// B-type (branches)
|
||||
3'b010: ImmExtD = {{(`XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0};
|
||||
3'b010: ImmExtD = {{(XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0};
|
||||
// J-type (jal)
|
||||
3'b011: ImmExtD = {{(`XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0};
|
||||
3'b011: ImmExtD = {{(XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0};
|
||||
// U-type (lui, auipc)
|
||||
3'b100: ImmExtD = {{(`XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0};
|
||||
3'b100: ImmExtD = {{(XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0};
|
||||
// Store Conditional: zero offset
|
||||
3'b101: if (`A_SUPPORTED) ImmExtD = 0;
|
||||
3'b101: if (A_SUPPORTED) ImmExtD = 0;
|
||||
else ImmExtD = undefined;
|
||||
default: ImmExtD = undefined; // undefined
|
||||
endcase
|
||||
|
@ -27,8 +27,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module forward(
|
||||
// Detect hazards
|
||||
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers
|
||||
|
@ -26,45 +26,44 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ieu (
|
||||
module ieu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
// Decode stage signals
|
||||
input logic [31:0] InstrD, // Instruction
|
||||
input logic IllegalIEUFPUInstrD, // Illegal instruction
|
||||
output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers
|
||||
// Execute stage signals
|
||||
input logic [`XLEN-1:0] PCE, // PC
|
||||
input logic [`XLEN-1:0] PCLinkE, // PC + 4
|
||||
input logic [P.XLEN-1:0] PCE, // PC
|
||||
input logic [P.XLEN-1:0] PCLinkE, // PC + 4
|
||||
output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE)
|
||||
input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int
|
||||
output logic [`XLEN-1:0] IEUAdrE, // Memory address
|
||||
output logic [P.XLEN-1:0] IEUAdrE, // Memory address
|
||||
output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction
|
||||
output logic [2:0] Funct3E, // Funct3 instruction field
|
||||
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
|
||||
output logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B
|
||||
output logic [4:0] RdE, // Destination register
|
||||
// Memory stage signals
|
||||
input logic SquashSCW, // Squash store conditional, from LSU
|
||||
output logic [1:0] MemRWM, // Read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // Atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] WriteDataM, // Write data to LSU
|
||||
output logic [P.XLEN-1:0] WriteDataM, // Write data to LSU
|
||||
output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU
|
||||
output logic [P.XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU
|
||||
output logic [4:0] RdM, // Destination register
|
||||
input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
|
||||
input logic [P.XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
|
||||
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
|
||||
output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid
|
||||
output logic BranchD, BranchE,
|
||||
output logic JumpD, JumpE,
|
||||
// Writeback stage signals
|
||||
input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
|
||||
input logic [`XLEN-1:0] CSRReadValW, // CSR read value,
|
||||
input logic [`XLEN-1:0] MDUResultW, // multiply/divide unit result
|
||||
input logic [`XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result
|
||||
input logic [P.XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
|
||||
input logic [P.XLEN-1:0] CSRReadValW, // CSR read value,
|
||||
input logic [P.XLEN-1:0] MDUResultW, // multiply/divide unit result
|
||||
input logic [P.XLEN-1:0] FCvtIntResW, // FPU's float to int conversion result
|
||||
input logic FCvtIntW, // FPU converts float to int
|
||||
output logic [4:0] RdW, // Destination register
|
||||
input logic [`XLEN-1:0] ReadDataW, // LSU's read data
|
||||
input logic [P.XLEN-1:0] ReadDataW, // LSU's read data
|
||||
// Hazard unit signals
|
||||
input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit
|
||||
input logic FlushD, FlushE, FlushM, FlushW, // Flush signals
|
||||
@ -96,7 +95,7 @@ module ieu (
|
||||
logic BranchSignedE; // Branch does signed comparison on operands
|
||||
logic MDUE; // Multiply/divide instruction
|
||||
|
||||
controller c(
|
||||
controller #(P) c(
|
||||
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
|
||||
.IllegalIEUFPUInstrD, .IllegalBaseInstrD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
|
||||
.PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE,
|
||||
@ -105,7 +104,7 @@ controller c(
|
||||
.RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
|
||||
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);
|
||||
|
||||
datapath dp(
|
||||
datapath #(P) dp(
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .W64E, .SubArithE,
|
||||
.Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .JumpE, .BranchSignedE,
|
||||
.PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .BSelectE, .ZBBSelectE, .BALUControlE,
|
||||
|
@ -27,18 +27,16 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module regfile (
|
||||
module regfile #(parameter XLEN, E_SUPPORTED) (
|
||||
input logic clk, reset,
|
||||
input logic we3, // Write enable
|
||||
input logic [4:0] a1, a2, a3, // Source registers to read (a1, a2), destination register to write (a3)
|
||||
input logic [`XLEN-1:0] wd3, // Write data for port 3
|
||||
output logic [`XLEN-1:0] rd1, rd2); // Read data for ports 1, 2
|
||||
input logic [XLEN-1:0] wd3, // Write data for port 3
|
||||
output logic [XLEN-1:0] rd1, rd2); // Read data for ports 1, 2
|
||||
|
||||
localparam NUMREGS = `E_SUPPORTED ? 16 : 32; // only 16 registers in E mode
|
||||
localparam NUMREGS = E_SUPPORTED ? 16 : 32; // only 16 registers in E mode
|
||||
|
||||
logic [`XLEN-1:0] rf[NUMREGS-1:1];
|
||||
logic [XLEN-1:0] rf[NUMREGS-1:1];
|
||||
integer i;
|
||||
|
||||
// Three ported register file
|
||||
|
@ -27,9 +27,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module RASPredictor #(parameter int StackSize = 16 )(
|
||||
module RASPredictor import cvw::*; #(parameter cvw_t P, StackSize = 16 )(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
|
||||
@ -37,15 +35,15 @@ module RASPredictor #(parameter int StackSize = 16 )(
|
||||
input logic ReturnD,
|
||||
input logic ReturnE, CallE, // Instr class
|
||||
input logic BPReturnF,
|
||||
input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call
|
||||
output logic [`XLEN-1:0] RASPCF // Top of the stack
|
||||
input logic [P.XLEN-1:0] PCLinkE, // PC of instruction after a call
|
||||
output logic [P.XLEN-1:0] RASPCF // Top of the stack
|
||||
);
|
||||
|
||||
logic CounterEn;
|
||||
localparam Depth = $clog2(StackSize);
|
||||
|
||||
logic [Depth-1:0] NextPtr, Ptr, P1, M1, IncDecPtr;
|
||||
logic [StackSize-1:0] [`XLEN-1:0] memory;
|
||||
logic [StackSize-1:0] [P.XLEN-1:0] memory;
|
||||
integer index;
|
||||
|
||||
logic PopF;
|
||||
@ -85,7 +83,7 @@ module RASPredictor #(parameter int StackSize = 16 )(
|
||||
always_ff @ (posedge clk) begin
|
||||
if(reset) begin
|
||||
for(index=0; index<StackSize; index++)
|
||||
memory[index] <= {`XLEN{1'b0}};
|
||||
memory[index] <= {P.XLEN{1'b0}};
|
||||
end else if(PushE) begin
|
||||
memory[NextPtr] <= #1 PCLinkE;
|
||||
end
|
||||
|
@ -26,27 +26,25 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
`define INSTR_CLASS_PRED 1
|
||||
|
||||
module bpred (
|
||||
module bpred import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
// Fetch stage
|
||||
// the prediction
|
||||
input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class
|
||||
input logic [`XLEN-1:0] PCNextF, // Next Fetch Address
|
||||
input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4
|
||||
output logic [`XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction
|
||||
output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage
|
||||
input logic [P.XLEN-1:0] PCNextF, // Next Fetch Address
|
||||
input logic [P.XLEN-1:0] PCPlus2or4F, // PCF+2/4
|
||||
output logic [P.XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction
|
||||
output logic [P.XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage
|
||||
|
||||
// Update Predictor
|
||||
input logic [`XLEN-1:0] PCF, // Fetch stage instruction address
|
||||
input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took
|
||||
input logic [`XLEN-1:0] PCE, // Execution stage instruction address
|
||||
input logic [`XLEN-1:0] PCM, // Memory stage instruction address
|
||||
input logic [P.XLEN-1:0] PCF, // Fetch stage instruction address
|
||||
input logic [P.XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took
|
||||
input logic [P.XLEN-1:0] PCE, // Execution stage instruction address
|
||||
input logic [P.XLEN-1:0] PCM, // Memory stage instruction address
|
||||
|
||||
input logic [31:0] PostSpillInstrRawF, // Instruction
|
||||
|
||||
@ -55,9 +53,9 @@ module bpred (
|
||||
input logic BranchD, BranchE,
|
||||
input logic JumpD, JumpE,
|
||||
input logic PCSrcE, // Executation stage branch is taken
|
||||
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
|
||||
input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address
|
||||
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
input logic [P.XLEN-1:0] IEUAdrE, // The branch/jump target address
|
||||
input logic [P.XLEN-1:0] IEUAdrM, // The branch/jump target address
|
||||
input logic [P.XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br
|
||||
|
||||
// Report branch prediction status
|
||||
@ -71,21 +69,21 @@ module bpred (
|
||||
|
||||
logic [1:0] BPDirPredF;
|
||||
|
||||
logic [`XLEN-1:0] BPBTAF, RASPCF;
|
||||
logic [P.XLEN-1:0] BPBTAF, RASPCF;
|
||||
logic BPPCWrongE;
|
||||
logic IClassWrongE;
|
||||
logic BPDirPredWrongE;
|
||||
|
||||
logic BPPCSrcF;
|
||||
logic [`XLEN-1:0] BPPCF;
|
||||
logic [`XLEN-1:0] PC0NextF;
|
||||
logic [`XLEN-1:0] PCCorrectE;
|
||||
logic [P.XLEN-1:0] BPPCF;
|
||||
logic [P.XLEN-1:0] PC0NextF;
|
||||
logic [P.XLEN-1:0] PCCorrectE;
|
||||
logic [3:0] WrongPredInstrClassD;
|
||||
|
||||
logic BTBTargetWrongE;
|
||||
logic RASTargetWrongE;
|
||||
|
||||
logic [`XLEN-1:0] BPBTAD;
|
||||
logic [P.XLEN-1:0] BPBTAD;
|
||||
|
||||
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
|
||||
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
|
||||
@ -95,57 +93,58 @@ module bpred (
|
||||
logic BranchM, JumpM, ReturnM, CallM;
|
||||
logic BranchW, JumpW, ReturnW, CallW;
|
||||
logic BPReturnWrongD;
|
||||
logic [`XLEN-1:0] BPBTAE;
|
||||
logic [P.XLEN-1:0] BPBTAE;
|
||||
|
||||
// Part 1 branch direction prediction
|
||||
// look into the 2 port Sram model. something is wrong.
|
||||
if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor
|
||||
twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW,
|
||||
if (P.BPRED_TYPE == BP_TWOBIT) begin:Predictor
|
||||
twoBitPredictor #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW,
|
||||
.FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
|
||||
end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor
|
||||
gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
end else if (P.BPRED_TYPE == BP_GSHARE) begin:Predictor
|
||||
gshare #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW,
|
||||
.PCSrcE);
|
||||
|
||||
end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor
|
||||
gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
end else if (P.BPRED_TYPE == BP_GLOBAL) begin:Predictor
|
||||
gshare #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW,
|
||||
.PCSrcE);
|
||||
|
||||
end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor
|
||||
gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
end else if (P.BPRED_TYPE == BP_GSHARE_BASIC) begin:Predictor
|
||||
gsharebasic #(P.XLEN, P.BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
|
||||
end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor
|
||||
gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
end else if (P.BPRED_TYPE == BP_GLOBAL_BASIC) begin:Predictor
|
||||
gsharebasic #(P.XLEN, P.BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
|
||||
end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
|
||||
// *** Fix me
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
localHistoryPredictor DirPredictor(.clk,
|
||||
.reset, .StallF, .StallE,
|
||||
.LookUpPC(PCNextF),
|
||||
.Prediction(BPDirPredF),
|
||||
// update
|
||||
.UpdatePC(PCE),
|
||||
.UpdateEN(InstrClassE[0] & ~StallE),
|
||||
.PCSrcE,
|
||||
.UpdatePrediction(InstrClassE[0]));
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
end else if (P.BPRED_TYPE == BP_LOCAL_BASIC) begin:Predictor
|
||||
localbpbasic #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
|
||||
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
end else if (P.BPRED_TYPE == BP_LOCAL_AHEAD) begin:Predictor
|
||||
localaheadbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
|
||||
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
|
||||
.BranchE, .BranchM, .PCSrcE);
|
||||
end else if (P.BPRED_TYPE == BP_LOCAL_REPAIR) begin:Predictor
|
||||
localrepairbp #(P.XLEN, P.BPRED_NUM_LHR, P.BPRED_SIZE) DirPredictor(.clk, .reset,
|
||||
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCE, .PCM, .BPDirPredD(BPDirPredF), .BPDirPredWrongE,
|
||||
.BranchD, .BranchE, .BranchM, .PCSrcE);
|
||||
end
|
||||
|
||||
// Part 2 Branch target address prediction
|
||||
// BTB contains target address for all CFI
|
||||
|
||||
btb #(`BTB_SIZE)
|
||||
btb #(P, P.BTB_SIZE)
|
||||
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM,
|
||||
.BPBTAF, .BPBTAD, .BPBTAE,
|
||||
@ -157,13 +156,13 @@ module bpred (
|
||||
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
|
||||
.InstrClassW({CallW, ReturnW, JumpW, BranchW}));
|
||||
|
||||
icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
icpred #(P, `INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
|
||||
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
|
||||
.BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD);
|
||||
|
||||
// Part 3 RAS
|
||||
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
|
||||
RASPredictor #(P) RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
|
||||
.BPReturnF, .ReturnD, .ReturnE, .CallE,
|
||||
.BPReturnWrongD, .RASPCF, .PCLinkE);
|
||||
|
||||
@ -179,21 +178,21 @@ module bpred (
|
||||
|
||||
// Output the predicted PC or corrected PC on miss-predict.
|
||||
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
|
||||
mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
|
||||
mux2 #(P.XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
|
||||
// Selects the BP or PC+2/4.
|
||||
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
|
||||
mux2 #(P.XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
|
||||
// If the prediction is wrong select the correct address.
|
||||
mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);
|
||||
mux2 #(P.XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF);
|
||||
// Correct branch/jump target.
|
||||
mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
|
||||
mux2 #(P.XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE);
|
||||
|
||||
// If the fence/csrw was predicted as a taken branch then we select PCF, rather than PCE.
|
||||
// Effectively this is PCM+4 or the non-existant PCLinkM
|
||||
if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
|
||||
if(`INSTR_CLASS_PRED) mux2 #(P.XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE);
|
||||
else assign NextValidPCE = PCE;
|
||||
|
||||
if(`ZICOUNTERS_SUPPORTED) begin
|
||||
logic [`XLEN-1:0] RASPCD, RASPCE;
|
||||
if(P.ZICOUNTERS_SUPPORTED) begin
|
||||
logic [P.XLEN-1:0] RASPCD, RASPCE;
|
||||
logic BTAWrongE, RASPredPCWrongE;
|
||||
// performance counters
|
||||
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
|
||||
@ -209,8 +208,8 @@ module bpred (
|
||||
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
|
||||
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
|
||||
|
||||
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
|
||||
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
|
||||
flopenrc #(P.XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
|
||||
flopenrc #(P.XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
|
||||
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
|
||||
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
|
||||
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
|
||||
|
@ -28,22 +28,20 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module btb #(parameter Depth = 10 ) (
|
||||
module btb import cvw::*; #(parameter cvw_t P, Depth = 10 ) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
|
||||
output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC
|
||||
output logic [`XLEN-1:0] BPBTAD,
|
||||
output logic [`XLEN-1:0] BPBTAE,
|
||||
input logic [P.XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages
|
||||
output logic [P.XLEN-1:0] BPBTAF, // BTB's guess at PC
|
||||
output logic [P.XLEN-1:0] BPBTAD,
|
||||
output logic [P.XLEN-1:0] BPBTAE,
|
||||
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
|
||||
// update
|
||||
input logic IClassWrongM, // BTB's instruction class guess was wrong
|
||||
input logic IClassWrongE,
|
||||
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
|
||||
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
|
||||
input logic [P.XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
|
||||
input logic [P.XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
|
||||
input logic [3:0] InstrClassD, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassE, // Instruction class to insert into btb
|
||||
input logic [3:0] InstrClassM, // Instruction class to insert into btb
|
||||
@ -51,12 +49,12 @@ module btb #(parameter Depth = 10 ) (
|
||||
);
|
||||
|
||||
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
|
||||
logic [`XLEN-1:0] ResetPC;
|
||||
logic [P.XLEN-1:0] ResetPC;
|
||||
logic MatchD, MatchE, MatchM, MatchW, MatchX;
|
||||
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
|
||||
logic [`XLEN+3:0] TableBTBPredF;
|
||||
logic [`XLEN-1:0] IEUAdrW;
|
||||
logic [`XLEN-1:0] PCW;
|
||||
logic [P.XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
|
||||
logic [P.XLEN+3:0] TableBTBPredF;
|
||||
logic [P.XLEN-1:0] IEUAdrW;
|
||||
logic [P.XLEN-1:0] PCW;
|
||||
logic BTBWrongE, BPBTAWrongE;
|
||||
logic BTBWrongM, BPBTAWrongM;
|
||||
|
||||
@ -75,7 +73,7 @@ module btb #(parameter Depth = 10 ) (
|
||||
// during reset. The BTB must produce a non X PC1NextF to allow the simulation to run.
|
||||
// While the mux could be included in IFU it is not necessary for the IROM/I$/bus.
|
||||
// For now it is optimal to leave it here.
|
||||
assign ResetPC = `RESET_VECTOR;
|
||||
assign ResetPC = P.RESET_VECTOR[P.XLEN-1:0];
|
||||
assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]};
|
||||
|
||||
assign MatchD = PCFIndex == PCDIndex;
|
||||
@ -93,22 +91,22 @@ module btb #(parameter Depth = 10 ) (
|
||||
|
||||
|
||||
// An optimization may be using a PC relative address.
|
||||
ram2p1r1wbe #(2**Depth, `XLEN+4) memory(
|
||||
ram2p1r1wbe #(2**Depth, P.XLEN+4) memory(
|
||||
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
|
||||
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));
|
||||
|
||||
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
|
||||
flopenrc #(P.XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
|
||||
|
||||
// BPBTAE is not strickly necessary. However it is used by two parts of wally.
|
||||
// 1. It gates updates to the BTB when the prediction does not change. This save power.
|
||||
// 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong.
|
||||
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
|
||||
flopenrc #(P.XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
|
||||
assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
|
||||
|
||||
flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM);
|
||||
assign BTBWrongM = BPBTAWrongM | IClassWrongM;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
|
||||
flopenr #(P.XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
flopenr #(P.XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
|
||||
|
||||
endmodule
|
||||
|
@ -27,9 +27,9 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module gshare #(parameter k = 10,
|
||||
module gshare #(parameter XLEN,
|
||||
parameter k = 10,
|
||||
parameter integer TYPE = 1) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
@ -38,7 +38,7 @@ module gshare #(parameter k = 10,
|
||||
output logic [1:0] BPDirPredF,
|
||||
output logic BPDirPredWrongE,
|
||||
// update
|
||||
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
|
||||
input logic [XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
|
||||
input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE
|
||||
);
|
||||
|
||||
|
@ -27,9 +27,8 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module gsharebasic #(parameter k = 10,
|
||||
module gsharebasic #(parameter XLEN,
|
||||
parameter k = 10,
|
||||
parameter TYPE = 1) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
@ -38,7 +37,7 @@ module gsharebasic #(parameter k = 10,
|
||||
output logic [1:0] BPDirPredF,
|
||||
output logic BPDirPredWrongE,
|
||||
// update
|
||||
input logic [`XLEN-1:0] PCNextF, PCM,
|
||||
input logic [XLEN-1:0] PCNextF, PCM,
|
||||
input logic BranchE, BranchM, PCSrcE
|
||||
);
|
||||
|
||||
|
@ -26,10 +26,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
|
||||
module icpred #(parameter INSTR_CLASS_PRED = 1)(
|
||||
module icpred import cvw::*; #(parameter cvw_t P, INSTR_CLASS_PRED = 1)(
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
@ -56,10 +53,10 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
|
||||
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
|
||||
logic NCJumpF, NCBranchF;
|
||||
|
||||
if(`C_SUPPORTED) begin
|
||||
if(P.C_SUPPORTED) begin
|
||||
logic [4:0] CompressedOpcF;
|
||||
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
|
||||
assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32;
|
||||
assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32;
|
||||
assign cj = CompressedOpcF == 5'h0d;
|
||||
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
|
||||
assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
|
||||
@ -72,13 +69,13 @@ module icpred #(parameter INSTR_CLASS_PRED = 1)(
|
||||
assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
|
||||
assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63;
|
||||
|
||||
assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF);
|
||||
assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF));
|
||||
assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF);
|
||||
assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF));
|
||||
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
|
||||
(`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
|
||||
(P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
|
||||
|
||||
assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
|
||||
(`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
|
||||
(P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
|
||||
|
||||
end else begin
|
||||
// This section connects the BTB's instruction class prediction.
|
||||
|
@ -1,130 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// locallHistoryPredictor.sv
|
||||
//
|
||||
// Written: Shreya Sanghai
|
||||
// Email: ssanghai@hmc.edu
|
||||
// Created: March 16, 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Global History Branch predictor with parameterized global history register
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module localHistoryPredictor #(parameter m = 6, // 2^m = number of local history branches
|
||||
k = 10) ( // number of past branches stored
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallE,
|
||||
input logic [`XLEN-1:0] LookUpPC,
|
||||
output logic [1:0] Prediction,
|
||||
// update
|
||||
input logic [`XLEN-1:0] UpdatePC,
|
||||
input logic UpdateEN, PCSrcE,
|
||||
input logic [1:0] UpdatePrediction
|
||||
);
|
||||
|
||||
logic [2**m-1:0][k-1:0] LHRNextF;
|
||||
logic [k-1:0] LHRF, ForwardLHRNext, LHRFNext;
|
||||
logic [m-1:0] LookUpPCIndex, UpdatePCIndex;
|
||||
logic [1:0] PredictionMemory;
|
||||
logic DoForwarding, DoForwardingF, DoForwardingPHT, DoForwardingPHTF;
|
||||
logic [1:0] UpdatePredictionF;
|
||||
|
||||
assign LHRFNext = {PCSrcE, LHRF[k-1:1]};
|
||||
assign UpdatePCIndex = {UpdatePC[m+1] ^ UpdatePC[1], UpdatePC[m:2]};
|
||||
assign LookUpPCIndex = {LookUpPC[m+1] ^ LookUpPC[1], LookUpPC[m:2]};
|
||||
|
||||
// INCASE we do ahead pipelining
|
||||
// ram2p1r1wb #(m,k) LHR(.clk(clk)),
|
||||
// .reset(reset),
|
||||
// .RA1(LookUpPCIndex), // need hashing function to get correct PC address
|
||||
// .RD1(LHRF),
|
||||
// .REN1(~StallF),
|
||||
// .WA1(UpdatePCIndex),
|
||||
// .WD1(LHRENExt),
|
||||
// .WEN1(UpdateEN),
|
||||
// .BitWEN1(2'b11));
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index < 2**m; index = index +1) begin:localhist
|
||||
flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateEN & (index == UpdatePCIndex)),
|
||||
.d(LHRFNext), .q(LHRNextF[index]));
|
||||
end
|
||||
|
||||
// need to forward when updating to the same address as reading.
|
||||
// first we compare to see if the update and lookup addreses are the same
|
||||
assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
|
||||
assign ForwardLHRNext = DoForwarding ? LHRFNext :LHRNextF[LookUpPCIndex];
|
||||
|
||||
// Make Prediction by reading the correct address in the PHT and also update the new address in the PHT
|
||||
// LHR referes to the address that the past k branches points to in the prediction stage
|
||||
// LHRE refers to the address that the past k branches points to in the exectution stage
|
||||
ram2p1r1wb #(k, 2) PHT(.clk(clk),
|
||||
.reset(reset),
|
||||
.ra1(ForwardLHRNext),
|
||||
.rd1(PredictionMemory),
|
||||
.ren1(~StallF),
|
||||
.wa2(LHRFNext),
|
||||
.wd2(UpdatePrediction),
|
||||
.wen2(UpdateEN),
|
||||
.bwe2(2'b11));
|
||||
|
||||
|
||||
|
||||
assign DoForwardingPHT = LHRFNext == ForwardLHRNext;
|
||||
|
||||
// register the update value and the forwarding signal into the Fetch stage
|
||||
// TODO: add stall logic ***
|
||||
flopr #(1) DoForwardingReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.d(DoForwardingPHT),
|
||||
.q(DoForwardingPHTF));
|
||||
|
||||
flopr #(2) UpdatePredictionReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.d(UpdatePrediction),
|
||||
.q(UpdatePredictionF));
|
||||
|
||||
assign Prediction = DoForwardingPHTF ? UpdatePredictionF : PredictionMemory;
|
||||
|
||||
//pipeline for LHR
|
||||
flopenrc #(k) LHRFReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallF),
|
||||
.clear(1'b0),
|
||||
.d(ForwardLHRNext),
|
||||
.q(LHRF));
|
||||
/*
|
||||
flopenrc #(k) LHRDReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallD),
|
||||
.clear(FlushD),
|
||||
.d(LHRF),
|
||||
.q(LHRD));
|
||||
|
||||
flopenrc #(k) LHREReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallE),
|
||||
.clear(FlushE),
|
||||
.d(LHRD),
|
||||
.q(LHRE));
|
||||
*/
|
||||
endmodule
|
114
src/ifu/bpred/localaheadbp.sv
Normal file
114
src/ifu/bpred/localaheadbp.sv
Normal file
@ -0,0 +1,114 @@
|
||||
///////////////////////////////////////////
|
||||
// localaheadbp
|
||||
//
|
||||
// Written: Ross Thompson
|
||||
// Email: ross1728@gmail.com
|
||||
// Created: 16 March 2021
|
||||
//
|
||||
// Purpose: local history branch predictor with ahead pipelining and SRAM memories.
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module localaheadbp #(parameter XLEN,
|
||||
parameter m = 6, // 2^m = number of local history branches
|
||||
parameter k = 10) ( // number of past branches stored
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
output logic [1:0] BPDirPredD,
|
||||
output logic BPDirPredWrongE,
|
||||
// update
|
||||
input logic [XLEN-1:0] PCNextF, PCM,
|
||||
input logic BranchE, BranchM, PCSrcE
|
||||
);
|
||||
|
||||
logic [k-1:0] IndexNextF, IndexM;
|
||||
//logic [1:0] BPDirPredD, BPDirPredE;
|
||||
logic [1:0] BPDirPredE;
|
||||
logic [1:0] BPDirPredM;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
|
||||
|
||||
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
|
||||
logic [k-1:0] LHRNextW;
|
||||
logic PCSrcM;
|
||||
logic [2**m-1:0][k-1:0] LHRArray;
|
||||
logic [m-1:0] IndexLHRNextF, IndexLHRM;
|
||||
logic [XLEN-1:0] PCW;
|
||||
|
||||
|
||||
logic UpdateM;
|
||||
|
||||
//assign IndexNextF = LHR;
|
||||
assign IndexM = LHRW;
|
||||
|
||||
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
|
||||
.ce1(~StallD), .ce2(~StallW & ~FlushW),
|
||||
.ra1(LHRF),
|
||||
.rd1(BPDirPredD),
|
||||
.wa2(IndexM),
|
||||
.wd2(NewBPDirPredW),
|
||||
.we2(BranchM),
|
||||
.bwe2(1'b1));
|
||||
|
||||
//flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
|
||||
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
|
||||
flopenrc #(2) PredictionRegM(clk, reset, FlushM, ~StallM, BPDirPredE, BPDirPredM);
|
||||
|
||||
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
|
||||
//flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
|
||||
flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
|
||||
|
||||
assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
|
||||
|
||||
// This is the main difference between global and local history basic implementations. In global,
|
||||
// the ghr wraps back into itself directly without
|
||||
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
|
||||
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
|
||||
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
|
||||
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
|
||||
assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
|
||||
|
||||
// this is local history
|
||||
//genvar index;
|
||||
//assign UpdateM = BranchM & ~StallW & ~FlushW;
|
||||
assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
|
||||
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
|
||||
|
||||
ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
|
||||
.ce1(~StallF), .ce2(~StallW & ~FlushW),
|
||||
.ra1(IndexLHRNextF),
|
||||
.rd1(LHRF),
|
||||
.wa2(IndexLHRM),
|
||||
.wd2(LHRNextW),
|
||||
.we2(BranchM),
|
||||
.bwe2('1));
|
||||
|
||||
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
|
||||
|
||||
//flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
|
||||
//assign LHRF = LHRNextF;
|
||||
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
|
||||
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
|
||||
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
|
||||
flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
|
||||
|
||||
flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
|
||||
endmodule
|
105
src/ifu/bpred/localbpbasic.sv
Normal file
105
src/ifu/bpred/localbpbasic.sv
Normal file
@ -0,0 +1,105 @@
|
||||
///////////////////////////////////////////
|
||||
// localbpbasic
|
||||
//
|
||||
// Written: Ross Thompson
|
||||
// Email: ross1728@gmail.com
|
||||
// Created: 16 March 2021
|
||||
//
|
||||
// Purpose: Local history branch predictor. Basic implementation without any repair and flop memories.
|
||||
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module localbpbasic #(parameter XLEN,
|
||||
parameter m = 6, // 2^m = number of local history branches
|
||||
parameter k = 10) ( // number of past branches stored
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
output logic [1:0] BPDirPredF,
|
||||
output logic BPDirPredWrongE,
|
||||
// update
|
||||
input logic [XLEN-1:0] PCNextF, PCM,
|
||||
input logic BranchE, BranchM, PCSrcE
|
||||
);
|
||||
|
||||
logic [k-1:0] IndexNextF, IndexM;
|
||||
logic [1:0] BPDirPredD, BPDirPredE;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM;
|
||||
|
||||
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHR;
|
||||
logic [k-1:0] LHRNextW;
|
||||
logic PCSrcM;
|
||||
logic [2**m-1:0][k-1:0] LHRArray;
|
||||
logic [m-1:0] IndexLHRNextF, IndexLHRM;
|
||||
|
||||
logic UpdateM;
|
||||
|
||||
assign IndexNextF = LHR;
|
||||
assign IndexM = LHRM;
|
||||
|
||||
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
|
||||
.ce1(~StallF), .ce2(~StallW & ~FlushW),
|
||||
.ra1(IndexNextF),
|
||||
.rd1(BPDirPredF),
|
||||
.wa2(IndexM),
|
||||
.wd2(NewBPDirPredM),
|
||||
.we2(BranchM),
|
||||
.bwe2(1'b1));
|
||||
|
||||
flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
|
||||
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
|
||||
|
||||
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE));
|
||||
flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
|
||||
|
||||
assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE;
|
||||
|
||||
// This is the main difference between global and local history basic implementations. In global,
|
||||
// the ghr wraps back into itself directly without
|
||||
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
|
||||
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
|
||||
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
|
||||
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
|
||||
assign LHRNextW = BranchM ? {PCSrcM, LHRM[k-1:1]} : LHRM;
|
||||
|
||||
// this is local history
|
||||
genvar index;
|
||||
assign UpdateM = BranchM & ~StallW & ~FlushW;
|
||||
assign IndexLHRM = {PCM[m+1] ^ PCM[1], PCM[m:2]};
|
||||
for (index = 0; index < 2**m; index = index +1) begin:localhist
|
||||
flopenr #(k) LocalHistoryRegister(.clk, .reset, .en(UpdateM & (index == IndexLHRM)),
|
||||
.d(LHRNextW), .q(LHRArray[index]));
|
||||
end
|
||||
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
|
||||
assign LHR = LHRArray[IndexLHRNextF];
|
||||
|
||||
// this is global history
|
||||
//flopenr #(k) LHRReg(clk, reset, ~StallM & ~FlushM & BranchM, LHRNextW, LHR);
|
||||
|
||||
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
|
||||
|
||||
flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHR, LHRF);
|
||||
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
|
||||
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
|
||||
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
|
||||
|
||||
|
||||
endmodule
|
135
src/ifu/bpred/localrepairbp.sv
Normal file
135
src/ifu/bpred/localrepairbp.sv
Normal file
@ -0,0 +1,135 @@
|
||||
///////////////////////////////////////////
|
||||
// localrepairbp
|
||||
//
|
||||
// Written: Ross Thompson
|
||||
// Email: ross1728@gmail.com
|
||||
// Created: 15 April 2023
|
||||
//
|
||||
// Purpose: Local history branch predictor with speculation and repair using CBH.
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module localrepairbp #(parameter XLEN,
|
||||
parameter m = 6, // 2^m = number of local history branches
|
||||
parameter k = 10) ( // number of past branches stored
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushD, FlushE, FlushM, FlushW,
|
||||
output logic [1:0] BPDirPredD,
|
||||
output logic BPDirPredWrongE,
|
||||
// update
|
||||
input logic [XLEN-1:0] PCNextF, PCE, PCM,
|
||||
input logic BranchD, BranchE, BranchM, PCSrcE
|
||||
);
|
||||
|
||||
//logic [1:0] BPDirPredD, BPDirPredE;
|
||||
logic [1:0] BPDirPredE;
|
||||
logic [1:0] BPDirPredM;
|
||||
logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW;
|
||||
|
||||
logic [k-1:0] LHRF, LHRD, LHRE, LHRM, LHRW, LHRNextF;
|
||||
logic [k-1:0] LHRNextW;
|
||||
logic PCSrcM;
|
||||
logic [2**m-1:0][k-1:0] LHRArray;
|
||||
logic [m-1:0] IndexLHRNextF, IndexLHRM;
|
||||
logic [XLEN-1:0] PCW;
|
||||
|
||||
logic [k-1:0] LHRCommittedF, LHRSpeculativeF;
|
||||
logic [m-1:0] IndexLHRD;
|
||||
logic [k-1:0] LHRNextE;
|
||||
logic SpeculativeFlushedF;
|
||||
|
||||
|
||||
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
|
||||
.ce1(~StallD), .ce2(~StallW & ~FlushW),
|
||||
.ra1(LHRF),
|
||||
.rd1(BPDirPredD),
|
||||
.wa2(LHRW),
|
||||
.wd2(NewBPDirPredW),
|
||||
.we2(BranchM),
|
||||
.bwe2(1'b1));
|
||||
|
||||
//flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD);
|
||||
flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE);
|
||||
flopenrc #(2) PredictionRegM(clk, reset, FlushM, ~StallM, BPDirPredE, BPDirPredM);
|
||||
|
||||
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredM), .NewState(NewBPDirPredM));
|
||||
//flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM);
|
||||
flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW);
|
||||
|
||||
assign BPDirPredWrongE = PCSrcE != BPDirPredM[1] & BranchE;
|
||||
|
||||
// This is the main difference between global and local history basic implementations. In global,
|
||||
// the ghr wraps back into itself directly without
|
||||
// being pipelined. I.E. GHR is not read in F and then pipelined to M where it is updated. Instead
|
||||
// GHR is both read and update in M. GHR is still pipelined so that the PHT is updated with the correct
|
||||
// GHR. Local history in contrast must pipeline the specific history register read during F and then update
|
||||
// that same one in M. This implementation does not forward if a branch matches in the D, E, or M stages.
|
||||
assign LHRNextW = BranchM ? {PCSrcM, LHRW[k-1:1]} : LHRW;
|
||||
|
||||
// this is local history
|
||||
assign IndexLHRM = {PCW[m+1] ^ PCW[1], PCW[m:2]};
|
||||
assign IndexLHRNextF = {PCNextF[m+1] ^ PCNextF[1], PCNextF[m:2]};
|
||||
|
||||
ram2p1r1wbe #(2**m, k) BHT(.clk(clk),
|
||||
.ce1(~StallF), .ce2(~StallW & ~FlushW),
|
||||
.ra1(IndexLHRNextF),
|
||||
.rd1(LHRCommittedF),
|
||||
.wa2(IndexLHRM),
|
||||
.wd2(LHRNextW),
|
||||
.we2(BranchM),
|
||||
.bwe2('1));
|
||||
|
||||
assign IndexLHRD = {PCE[m+1] ^ PCE[1], PCE[m:2]};
|
||||
assign LHRNextE = BranchD ? {BPDirPredD[1], LHRE[k-1:1]} : LHRE;
|
||||
// *** replace with a small CAM
|
||||
ram2p1r1wbe #(2**m, k) SHB(.clk(clk),
|
||||
.ce1(~StallF), .ce2(~StallE & ~FlushE),
|
||||
.ra1(IndexLHRNextF),
|
||||
.rd1(LHRSpeculativeF),
|
||||
.wa2(IndexLHRD),
|
||||
.wd2(LHRNextE),
|
||||
.we2(BranchD),
|
||||
.bwe2('1));
|
||||
// **** replace with small CAM
|
||||
logic [2**m-1:0] FlushedBits;
|
||||
always_ff @(posedge clk) begin // Valid bit array,
|
||||
SpeculativeFlushedF <= #1 FlushedBits[IndexLHRNextF];
|
||||
if (reset | FlushD) FlushedBits <= #1 '1;
|
||||
if(BranchD & ~StallE & ~FlushE) begin
|
||||
FlushedBits[IndexLHRD] <= #1 '0;
|
||||
end
|
||||
end
|
||||
|
||||
//assign SpeculativeFlushedF = '1;
|
||||
mux2 #(k) LHRMux(LHRSpeculativeF, LHRCommittedF, SpeculativeFlushedF, LHRF);
|
||||
|
||||
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
|
||||
|
||||
//flopenrc #(k) LHRFReg(clk, reset, FlushD, ~StallF, LHRNextF, LHRF);
|
||||
//assign LHRF = LHRNextF;
|
||||
flopenrc #(k) LHRDReg(clk, reset, FlushD, ~StallD, LHRF, LHRD);
|
||||
flopenrc #(k) LHREReg(clk, reset, FlushE, ~StallE, LHRD, LHRE);
|
||||
flopenrc #(k) LHRMReg(clk, reset, FlushM, ~StallM, LHRE, LHRM);
|
||||
flopenrc #(k) LHRWReg(clk, reset, FlushW, ~StallW, LHRM, LHRW);
|
||||
|
||||
flopenr #(XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
|
||||
endmodule
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user