This commit is contained in:
Madeleine Masser-Frye 2022-08-02 16:25:25 +00:00
commit 658f2626f4
185 changed files with 6856 additions and 5978 deletions

5
.gitignore vendored
View File

@ -7,12 +7,8 @@ __pycache__/
.vscode/ .vscode/
#External repos #External repos
addins
addins/riscv-arch-test/Makefile.include addins/riscv-arch-test/Makefile.include
addins/riscv-tests/target addins/riscv-tests/target
addins/coremark/work/*
addins/embench/bd_speed/*
addins/embench/bd_size/*
benchmarks/embench/wally*.json benchmarks/embench/wally*.json
#vsim work files to ignore #vsim work files to ignore
@ -34,6 +30,7 @@ testsBP/*/*.a
tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/* tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/*
tests/riscof/riscof_work/ tests/riscof/riscof_work/
tests/riscof/config32.ini tests/riscof/config32.ini
tests/riscof/config32e.ini
tests/riscof/config64.ini tests/riscof/config64.ini
tests/linux-testgen/linux-testvectors/* tests/linux-testgen/linux-testvectors/*
!tests/linux-testgen/linux-testvectors/tvCopier.py !tests/linux-testgen/linux-testvectors/tvCopier.py

1
.gitmodules vendored
View File

@ -17,6 +17,7 @@
[submodule "addins/embench-iot"] [submodule "addins/embench-iot"]
path = addins/embench-iot path = addins/embench-iot
url = https://github.com/embench/embench-iot url = https://github.com/embench/embench-iot
branch = embench-1.0-branch
[submodule "addins/coremark"] [submodule "addins/coremark"]
path = addins/coremark path = addins/coremark
url = https://github.com/eembc/coremark url = https://github.com/eembc/coremark

@ -1 +1 @@
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545

View File

@ -1,6 +1,8 @@
# Wally Coremark Makefile
# Daniel Torres & David Harris 28 July 2022
PORT_DIR = $(CURDIR)/riscv64-baremetal PORT_DIR = $(CURDIR)/riscv64-baremetal
cmbase=../../addins/coremark cmbase=../../addins/coremark
# cmbase= ../riscv-coremark/coremark
work_dir= ../../benchmarks/coremark/work work_dir= ../../benchmarks/coremark/work
XLEN ?=64 XLEN ?=64
sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
@ -8,27 +10,18 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32)
PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -falign-functions=16 \ ARCH := rv$(XLEN)im
-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \
-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions -falign-jumps=4 \
-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \
-nostdlib -nostartfiles -ffreestanding -mstrict-align \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \
-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN)
# flags that cause build errors mcmodel=medlow
# -static -mcmodel=medlow -mtune=sifive-7-series \
# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET
# -finline-functions -falign-jumps=4 \
# -nostdlib -nostartfiles -ffreestanding -mstrict-align \
# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
# -DPERFORMANCE_RUN=1
# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
all: $(work_dir)/coremark.bare.riscv.elf.memfile all: $(work_dir)/coremark.bare.riscv.elf.memfile
run: run:
(cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) (cd ../../pipelined/regression && (time vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log))
cd ../../benchmarks/coremark/ cd ../../benchmarks/coremark/
$(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
@ -37,9 +30,7 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
extractFunctionRadix.sh $<.elf.objdump extractFunctionRadix.sh $<.elf.objdump
$(work_dir)/coremark.bare.riscv: $(sources) Makefile $(work_dir)/coremark.bare.riscv: $(sources) Makefile
# These flags were used by WD on CoreMark
make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)"
# -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
mkdir -p $(work_dir) mkdir -p $(work_dir)
mv $(cmbase)/coremark.bare.riscv $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir)
@ -49,14 +40,3 @@ clean:
rm -f $(work_dir)/* rm -f $(work_dir)/*
# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \
# # -O3 -falign-functions=16 -funroll-all-loops \
# # -finline-functions -falign-jumps=4 \
# # -nostdlib -nostartfiles -ffreestanding -mstrict-align \
# # -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
# # -DPERFORMANCE_RUN=1
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "

View File

@ -114,7 +114,12 @@ void portable_free(void *p) {
#define read_csr(reg) ({ unsigned long __tmp; \ #define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; }) __tmp; })
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) // #if (XLEN==64)
// typedef unsigned long long ee_ptr_int;
// #else
// typedef unsigned long ee_ptr_int;
// #endif
#define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini)) #define MYTIMEDIFF(fin,ini) ((fin)-(ini))
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000 #define TIMER_RES_DIVIDER 10000
@ -196,10 +201,13 @@ void stop_time(void) {
CORE_TICKS get_time(void) { CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
unsigned long instructions = minstretDiff(); unsigned long instructions = minstretDiff();
ee_printf(" Called get_time\n"); ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100
ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100
ee_printf(" WALLY CoreMark Results (from get_time)\n");
ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MTIME: %u\n", elapsed);
ee_printf(" Elapsed MINSTRET: %lu\n", instructions); ee_printf(" Elapsed MINSTRET: %lu\n", instructions);
ee_printf(" CPI: %lu / %lu\n", elapsed, instructions); ee_printf(" COREMARK/MHz Score: 10,000,000 / %lu = %d.%02d \n", elapsed, cm100/100, cm100%100);
ee_printf(" CPI: %lu / %lu = %d.%02d\n", elapsed, instructions, cpi100/100, cpi100%100);
return elapsed; return elapsed;
} }
/* Function: time_in_secs /* Function: time_in_secs

View File

@ -66,14 +66,19 @@ typedef size_t CORE_TICKS;
#elif HAS_TIME_H #elif HAS_TIME_H
#include <time.h> #include <time.h>
typedef clock_t CORE_TICKS; typedef clock_t CORE_TICKS;
#else // #elif (XLEN==32)
// #include <sys/types.h>
// typedef ee_u32 CORE_TICKS;
/* Configuration: size_t and clock_t /* Configuration: size_t and clock_t
Note these need to match the size of the clock output and the xLen the processor supports Note these need to match the size of the clock output and the xLen the processor supports
*/ */
#elif (XLEN==64)
typedef unsigned long int size_t; typedef unsigned long int size_t;
typedef unsigned long int clock_t; typedef unsigned long int clock_t;
typedef clock_t CORE_TICKS; #else
#include <sys/types.h>
#endif #endif
typedef clock_t CORE_TICKS;
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform Initialize these strings per platform
@ -89,7 +94,7 @@ typedef clock_t CORE_TICKS;
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif #endif
#ifndef MEM_LOCATION #ifndef MEM_LOCATION
#define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" #define MEM_LOCATION "Code and Data in external RAM"
#define MEM_LOCATION_UNSPEC 1 #define MEM_LOCATION_UNSPEC 1
#endif #endif
@ -105,11 +110,16 @@ typedef signed int ee_s32;
typedef double ee_f32; typedef double ee_f32;
typedef unsigned char ee_u8; typedef unsigned char ee_u8;
typedef unsigned int ee_u32; typedef unsigned int ee_u32;
typedef unsigned long long ee_ptr_int; #if (XLEN==64)
typedef unsigned long long ee_ptr_int;
#else
typedef ee_u32 ee_ptr_int;
#endif
typedef size_t ee_size_t; typedef size_t ee_size_t;
/* align an offset to point to a 32b value */ /* align an offset to point to a 32b value */
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) #define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
/* Configuration: SEED_METHOD /* Configuration: SEED_METHOD
Defines method to get seed values that cannot be computed at compile time. Defines method to get seed values that cannot be computed at compile time.

View File

@ -33,13 +33,13 @@ CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
# Flag: CFLAGS # Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" # Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
#PORT_CFLAGS = -O2 -static -std=gnu99 #PORT_CFLAGS = -O2 -static -std=gnu99
PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld PORT_CFLAGS = -mcmodel=medany -fno-tree-loop-distribute-patterns -fno-common -lm -lgcc -T $(PORT_DIR)/link.ld
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag: LFLAGS_END #Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). # Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. # Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END += LFLAGS_END += -static-libgcc -lgcc
# Flag: PORT_SRCS # Flag: PORT_SRCS
# Port specific source files can be added here # Port specific source files can be added here
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S

View File

@ -4,7 +4,8 @@
embench_dir = ../../addins/embench-iot embench_dir = ../../addins/embench-iot
all: build sim size all: build
run: size sim
allClean: clean all allClean: clean all

View File

@ -130,5 +130,4 @@
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 1 `define HPTW_WRITES_SUPPORTED 1

View File

@ -141,5 +141,4 @@
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 1 `define HPTW_WRITES_SUPPORTED 1

View File

@ -135,5 +135,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -133,5 +133,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -135,6 +135,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -133,6 +133,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -136,6 +136,4 @@
`define TESTSBP 1 `define TESTSBP 1
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -32,14 +32,14 @@
`define DESIGN_COMPILER 0 `define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64 // RV32 or RV64: XLEN = 32 or 64
`define XLEN 64 `define XLEN 32
// IEEE 754 compliance // IEEE 754 compliance
`define IEEE754 0 `define IEEE754 0
// MISA RISC-V configuration per specification // MISA RISC-V configuration per specification
// ZYXWVUTSRQPONMLKJIHGFEDCBA // ZYXWVUTSRQPONMLKJIHGFEDCBA
`define MISA 32'b0000000000101000001000100100101 `define MISA 32'b0000000000101000001000100101101
`define ZICSR_SUPPORTED 1 `define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32 `define COUNTERS 32
@ -137,5 +137,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0 `define TESTSBP 0
`define BPRED_SIZE 10 `define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0 `define HPTW_WRITES_SUPPORTED 0

View File

@ -95,11 +95,27 @@
// largest length in IEU/FPU // largest length in IEU/FPU
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) `define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
// division constants
`define RADIX 32'h4
`define DIVCOPIES 32'h4
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
// one interation is required for the integer bit for minimally redundent radix-4
`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
`define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1
`define USE_SRAM 0 `define USE_SRAM 0

View File

@ -1,4 +1,4 @@
all: archtests wallytests memfiles all: riscoftests memfiles
# *** Build old tests/imperas-riscv-tests for now; # *** Build old tests/imperas-riscv-tests for now;
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
@ -19,18 +19,15 @@ allclean: clean all
clean: clean:
make clean -C ../../tests/riscof make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test # make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests # make allclean -C ../../tests/imperas-riscv-tests
archtests: riscoftests:
# Build riscv-arch-test 64 and 32-bit versions # Builds riscv-arch-test 64 and 32-bit versions and builds wally-riscv-arch-test 64 and 32-bit versions
make -C ../../tests/riscof/ --jobs make -C ../../tests/riscof/
make -C ../../tests/riscof/ XLEN=32 --jobs # make -C ../../tests/riscof/ XLEN=32
# make -C ../../tests/riscof/ XLEN=32 build_rv32e
wallytests: # make -C ../../tests/riscof/ XLEN=64
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
memfiles: memfiles:
make -f makefile-memfile wally-sim-files --jobs make -f makefile-memfile wally-sim-files --jobs

View File

@ -11,11 +11,9 @@ add wave -noupdate -expand -group Testbench /testbench/interruptEpcVal
add wave -noupdate -expand -group Testbench /testbench/interruptTVal add wave -noupdate -expand -group Testbench /testbench/interruptTVal
add wave -noupdate -expand -group Testbench /testbench/interruptDesc add wave -noupdate -expand -group Testbench /testbench/interruptDesc
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/ExceptionM add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/ExceptionM
@ -56,26 +54,25 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -group {Execution Stage} /testbench/ExpectedPCE add wave -noupdate -group {Execution Stage} /testbench/ExpectedPCE
add wave -noupdate -group {Execution Stage} /testbench/MepcExpected
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -group {Execution Stage} /testbench/textE add wave -noupdate -group {Execution Stage} /testbench/textE
add wave -noupdate -group {Execution Stage} -color {Cornflower Blue} /testbench/FunctionName/FunctionName add wave -noupdate -group {Execution Stage} -color {Cornflower Blue} /testbench/FunctionName/FunctionName
add wave -noupdate -group {Memory Stage} /testbench/checkInstrM add wave -noupdate -expand -group {Memory Stage} /testbench/checkInstrM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -group {Memory Stage} /testbench/ExpectedPCM add wave -noupdate -expand -group {Memory Stage} /testbench/ExpectedPCM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -group {Memory Stage} /testbench/textM add wave -noupdate -expand -group {Memory Stage} /testbench/textM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group {WriteBack stage} /testbench/checkInstrW add wave -noupdate -expand -group {WriteBack stage} /testbench/checkInstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrValidW add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrValidW
add wave -noupdate -group {WriteBack stage} /testbench/PCW add wave -noupdate -expand -group {WriteBack stage} /testbench/PCW
add wave -noupdate -group {WriteBack stage} /testbench/ExpectedPCW add wave -noupdate -expand -group {WriteBack stage} /testbench/ExpectedPCW
add wave -noupdate -group {WriteBack stage} /testbench/InstrW add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrWName add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrWName
add wave -noupdate -group {WriteBack stage} /testbench/textW add wave -noupdate -expand -group {WriteBack stage} /testbench/textW
add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
add wave -noupdate -group Bpred -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF
add wave -noupdate -group Bpred -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} add wave -noupdate -group Bpred -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]}
@ -196,203 +193,202 @@ add wave -noupdate -group ifu -expand -group icache -expand -group memory /testb
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
add wave -noupdate -group lsu /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUPAdrM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUPAdrM
add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState add wave -noupdate -expand -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData[69]} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V add wave -noupdate -expand -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault
add wave -noupdate -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM add wave -noupdate -expand -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState
add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM
@ -424,55 +420,14 @@ add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PMAInstrAccessFaultF add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PMAInstrAccessFaultF
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb
@ -486,11 +441,6 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group {debug trace} -expand -group mem -color Yellow /testbench/dut/core/FlushW add wave -noupdate -group {debug trace} -expand -group mem -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group {debug trace} -expand -group mem /testbench/checkInstrM add wave -noupdate -group {debug trace} -expand -group mem /testbench/checkInstrM
add wave -noupdate -group {debug trace} -expand -group mem /testbench/dut/core/PCM add wave -noupdate -group {debug trace} -expand -group mem /testbench/dut/core/PCM
@ -517,7 +467,7 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
TreeUpdate [SetDefaultTree] TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 4} {2240751 ns} 0} WaveRestoreCursors {{Cursor 4} {87475 ns} 0}
quietly wave cursor active 1 quietly wave cursor active 1
configure wave -namecolwidth 250 configure wave -namecolwidth 250
configure wave -valuecolwidth 314 configure wave -valuecolwidth 314
@ -533,4 +483,4 @@ configure wave -griddelta 40
configure wave -timeline 0 configure wave -timeline 0
configure wave -timelineunits ns configure wave -timelineunits ns
update update
WaveRestoreZoom {2240730 ns} {2240764 ns} WaveRestoreZoom {87386 ns} {87672 ns}

View File

@ -64,7 +64,7 @@ tc = TestCase(
grepstr="400100000 instructions") grepstr="400100000 instructions")
configs.append(tc) configs.append(tc)
tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"] tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64f", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"]
for test in tests64gc: for test in tests64gc:
tc = TestCase( tc = TestCase(
name=test, name=test,
@ -73,7 +73,7 @@ for test in tests64gc:
grepstr="All tests ran without failures") grepstr="All tests ran without failures")
configs.append(tc) configs.append(tc)
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "arch32d", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i",
for test in tests32gc: for test in tests32gc:
tc = TestCase( tc = TestCase(
name=test, name=test,

View File

@ -6,7 +6,7 @@
# fma - test fma # fma - test fma
# sub - test subtraction # sub - test subtraction
# div - test division # div - test division
# sqrt - test square ro # sqrt - test square root
# all - test everything # all - test everything
vsim -do "do testfloat.do rv64fp mul" vsim -do "do testfloat.do rv64fp $1"

View File

@ -1,7 +1,9 @@
# cvtint - test integer conversion unit (fcvtint) # cvtint - test integer conversion unit (fcvtint)
# cvtfp - test floating-point conversion unit (fcvtfp) # cvtfp - test floating-point conversion unit (fcvtfp)
# cmp - test comparison unit's LT, LE, EQ opperations (fcmp) # cmp - test comparison unit's LT, LE, EQ opperations (fcmp)
# add - test addition # add - test addition
# fma - test fma
# sub - test subtraction # sub - test subtraction
# div - test division # div - test division
# sqrt - test square root # sqrt - test square root

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv32gc arch32i" vsim -do "do wally-pipelined.do rv32gc wally32periph"

View File

@ -1 +1 @@
vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f" vsim -c -do "do wally-pipelined-batch.do rv64gc arch64d"

View File

@ -38,7 +38,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 -fatal 7 vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 -fatal 7
#-- Run the Simulation #-- Run the Simulation
run -all #run -all
add log -recursive /* add log -recursive /*
do linux-wave.do do linux-wave.do
run -all run -all

View File

@ -9,22 +9,32 @@ add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans add wave -noupdate /testbenchfp/Ans
add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivStart
add wave -noupdate /testbenchfp/DivBusy add wave -noupdate /testbenchfp/DivBusy
add wave -noupdate /testbenchfp/srtfsm/state add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -5,38 +5,38 @@ add wave -noupdate /testbench/reset
add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/reset_ext
add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/memfilename
add wave -noupdate /testbench/dut/core/SATP_REGW add wave -noupdate /testbench/dut/core/SATP_REGW
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/BPPredWrongE add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LoadStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/StoreStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/ifu/IFUStallF
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LSUStallM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/MDUStallD add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/MDUStallD
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/DivBusyE add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
@ -55,10 +55,10 @@ add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName add wave -noupdate -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group {WriteBack stage} /testbench/PCW add wave -noupdate -group {WriteBack stage} /testbench/PCW
add wave -noupdate -group {WriteBack stage} /testbench/InstrW add wave -noupdate -group {WriteBack stage} /testbench/InstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrWName add wave -noupdate -group {WriteBack stage} /testbench/InstrWName
@ -190,195 +190,179 @@ add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED
add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intEn add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intEn
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intInProgress add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intInProgress
@ -392,41 +376,12 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/pl
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/max_priority_with_irqs add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/max_priority_with_irqs
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/irqs_at_max_priority add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/irqs_at_max_priority
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/threshMask add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/threshMask
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESETn
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESPUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADYUART
add wave -noupdate -group uart -expand -group Registers -expand /testbench/dut/uncore/uart/uart/u/LSR add wave -noupdate -group uart -expand -group Registers -expand /testbench/dut/uncore/uart/uart/u/LSR
add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MCR add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MCR
add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MSR add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MSR
@ -489,6 +444,57 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
@ -506,11 +512,9 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I
add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {ICACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]} add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {ICACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/NextPTE
add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/UpdatePTE
TreeUpdate [SetDefaultTree] TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 5} {0 ns} 0} WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {311315 ns} 0}
quietly wave cursor active 1 quietly wave cursor active 3
configure wave -namecolwidth 250 configure wave -namecolwidth 250
configure wave -valuecolwidth 314 configure wave -valuecolwidth 314
configure wave -justifyvalue left configure wave -justifyvalue left
@ -525,4 +529,4 @@ configure wave -griddelta 40
configure wave -timeline 0 configure wave -timeline 0
configure wave -timelineunits ns configure wave -timelineunits ns
update update
WaveRestoreZoom {0 ns} {208 ns} WaveRestoreZoom {311178 ns} {311464 ns}

View File

@ -30,7 +30,7 @@
`include "wally-config.vh" `include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTERVAL, DCACHE) ( module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk, input logic clk,
input logic reset, input logic reset,
// cpu side // cpu side
@ -38,14 +38,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
input logic [1:0] RW, input logic [1:0] RW,
input logic [1:0] Atomic, input logic [1:0] Atomic,
input logic FlushCache, input logic FlushCache,
input logic InvalidateCacheM, input logic InvalidateCache,
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // physical address input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask, input logic [(WORDLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData, input logic [WORDLEN-1:0] FinalWriteData,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
output logic CacheCommitted, output logic CacheCommitted,
output logic CacheStall, output logic CacheStall,
// to performance counters to cpu // to performance counters to cpu
@ -60,7 +57,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
output logic CacheFetchLine, output logic CacheFetchLine,
output logic CacheWriteLine, output logic CacheWriteLine,
input logic CacheBusAck, input logic CacheBusAck,
input logic [LOGWPL-1:0] WordCount, input logic [LOGBWPL-1:0] WordCount,
input logic LSUBusWriteCrit, input logic LSUBusWriteCrit,
output logic [`PA_BITS-1:0] CacheBusAdr, output logic [`PA_BITS-1:0] CacheBusAdr,
input logic [LINELEN-1:0] CacheBusWriteData, input logic [LINELEN-1:0] CacheBusWriteData,
@ -72,7 +69,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
localparam SETLEN = $clog2(NUMLINES); localparam SETLEN = $clog2(NUMLINES);
localparam SETTOP = SETLEN+OFFSETLEN; localparam SETTOP = SETLEN+OFFSETLEN;
localparam TAGLEN = `PA_BITS - SETTOP; localparam TAGLEN = `PA_BITS - SETTOP;
localparam WORDSPERLINE = LINELEN/`XLEN; localparam WORDSPERLINE = LINELEN/WORDLEN;
localparam FlushAdrThreshold = NUMLINES - 1; localparam FlushAdrThreshold = NUMLINES - 1;
logic SelAdr; logic SelAdr;
@ -81,7 +78,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
logic ClearValid; logic ClearValid;
logic ClearDirty; logic ClearDirty;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] HitWay, HitWaySaved, HitWayFinal; logic [NUMWAYS-1:0] HitWay;
logic CacheHit; logic CacheHit;
logic SetDirty; logic SetDirty;
logic SetValid; logic SetValid;
@ -107,9 +104,17 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
logic [NUMWAYS-1:0] SelectedWay; logic [NUMWAYS-1:0] SelectedWay;
logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay;
logic [1:0] CacheRW, CacheAtomic; logic [1:0] CacheRW, CacheAtomic;
logic [LINELEN-1:0] ReadDataLine; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic save, restore; logic SelBusBuffer;
logic SRAMEnable;
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, LineByteMux;
genvar index;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Read Path // Read Path
@ -123,51 +128,56 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
// Array of cache ways, along with victim, hit, dirty, and read merging logic // Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2, CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM)); .Invalidate(InvalidateCache));
if(NUMWAYS > 1) begin:vict if(NUMWAYS > 1) begin:vict
cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy(
.clk, .reset, .HitWay(HitWayFinal), .VictimWay, .RAdr, .LRUWriteEn); .clk, .reset, .HitWay, .VictimWay, .RAdr, .LRUWriteEn);
end else assign VictimWay = 1'b1; // one hot. end else assign VictimWay = 1'b1; // one hot.
assign CacheHit = | HitWay; assign CacheHit = | HitWay;
assign VictimDirty = | VictimDirtyWay; assign VictimDirty = | VictimDirtyWay;
// ReadDataLineWay is a 2d array of cache line len by number of ways. // ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner. // Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway. // Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag));
// Because of the sram clocked read when the ieu is stalled the read data maybe lost.
// There are two ways to resolve. 1. We can replay the read of the sram or we can save
// the data. Replay is eaiser but creates a longer critical path.
// save/restore only wayhit and readdata.
if(!`REPLAY) begin
flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, HitWay, HitWaySaved);
mux2 #(NUMWAYS) saverestoremux(HitWay, HitWaySaved, restore, HitWayFinal);
end else assign HitWayFinal = HitWay;
// like to fix this. // like to fix this.
if(DCACHE) if(DCACHE)
mux2 #(LOGWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(WordCount), .s(LSUBusWriteCrit), .d1(WordCount), .s(LSUBusWriteCrit),
.y(WordOffsetAddr)); .y(WordOffsetAddr));
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread( mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine);
.clk, .reset, .PAdr(WordOffsetAddr), .save, .restore,
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
.PAdr(WordOffsetAddr),
.ReadDataLine, .ReadDataWord); .ReadDataLine, .ReadDataWord);
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU. // Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
if (`LLEN>`XLEN) logic [LINELEN-1:0] FinalWriteDataDup;
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}};
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
else onehotdecoder #(LOGCWPL) adrdec(
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), .bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes.
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(FinalWriteDataDup[8*index+7:8*index]),
.d1(CacheBusWriteData[8*index+7:8*index]), .s(LineByteMux[index]), .y(CacheWriteData[8*index+7:8*index]));
end
//mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
// .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),
@ -191,7 +201,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write Enables // Write Path: Write Enables
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
mux3 #(NUMWAYS) selectwaymux(HitWayFinal, VictimWay, FlushWay, mux3 #(NUMWAYS) selectwaymux(HitWay, VictimWay, FlushWay,
{SelFlush, SetValid}, SelectedWay); {SelFlush, SetValid}, SelectedWay);
assign SetValidWay = SetValid ? SelectedWay : '0; assign SetValidWay = SetValid ? SelectedWay : '0;
assign ClearValidWay = ClearValid ? SelectedWay : '0; assign ClearValidWay = ClearValid ? SelectedWay : '0;
@ -210,7 +220,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
.ClearValid, .ClearDirty, .SetDirty, .ClearValid, .ClearDirty, .SetDirty,
.SetValid, .SelEvict, .SelFlush, .SetValid, .SelEvict, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst,
.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer,
.save, .restore, .InvalidateCache,
.SRAMEnable,
.LRUWriteEn); .LRUWriteEn);
endmodule endmodule

View File

@ -32,50 +32,51 @@
module cachefsm module cachefsm
(input logic clk, (input logic clk,
input logic reset, input logic reset,
// inputs from IEU // inputs from IEU
input logic [1:0] CacheRW, input logic [1:0] CacheRW,
input logic [1:0] CacheAtomic, input logic [1:0] CacheAtomic,
input logic FlushCache, input logic FlushCache,
input logic InvalidateCache,
// hazard inputs // hazard inputs
input logic CPUBusy, input logic CPUBusy,
// interlock fsm // interlock fsm
input logic IgnoreRequestTLB, input logic IgnoreRequestTLB,
input logic IgnoreRequestTrapM, input logic IgnoreRequestTrapM,
input logic TrapM, input logic TrapM,
// Bus inputs // Bus inputs
input logic CacheBusAck, input logic CacheBusAck,
// dcache internals // dcache internals
input logic CacheHit, input logic CacheHit,
input logic VictimDirty, input logic VictimDirty,
input logic FlushAdrFlag, input logic FlushAdrFlag,
input logic FlushWayFlag, input logic FlushWayFlag,
// hazard outputs // hazard outputs
output logic CacheStall, output logic CacheStall,
// counter outputs // counter outputs
output logic CacheMiss, output logic CacheMiss,
output logic CacheAccess, output logic CacheAccess,
// Bus outputs // Bus outputs
output logic CacheCommitted, output logic CacheCommitted,
output logic CacheWriteLine, output logic CacheWriteLine,
output logic CacheFetchLine, output logic CacheFetchLine,
// dcache internals // dcache internals
output logic SelAdr, output logic SelAdr,
output logic ClearValid, output logic ClearValid,
output logic ClearDirty, output logic ClearDirty,
output logic SetDirty, output logic SetDirty,
output logic SetValid, output logic SetValid,
output logic SelEvict, output logic SelEvict,
output logic LRUWriteEn, output logic LRUWriteEn,
output logic SelFlush, output logic SelFlush,
output logic FlushAdrCntEn, output logic FlushAdrCntEn,
output logic FlushWayCntEn, output logic FlushWayCntEn,
output logic FlushAdrCntRst, output logic FlushAdrCntRst,
output logic FlushWayCntRst, output logic FlushWayCntRst,
output logic save, output logic SelBusBuffer,
output logic restore); output logic SRAMEnable);
logic resetDelay; logic resetDelay;
logic AMO; logic AMO;
@ -87,20 +88,13 @@ module cachefsm
typedef enum logic [3:0] {STATE_READY, // hit states typedef enum logic [3:0] {STATE_READY, // hit states
// miss states // miss states
STATE_MISS_FETCH_WDV, STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_EVICT_DIRTY, STATE_MISS_EVICT_DIRTY,
STATE_MISS_WRITE_CACHE_LINE, STATE_MISS_WRITE_CACHE_LINE,
STATE_MISS_READ_WORD,
STATE_MISS_READ_WORD_DELAY,
STATE_MISS_WRITE_WORD,
// cpu stalled replay/restore state
STATE_CPU_BUSY,
// flush cache // flush cache
STATE_FLUSH, STATE_FLUSH,
STATE_FLUSH_CHECK, STATE_FLUSH_CHECK,
STATE_FLUSH_INCR, STATE_FLUSH_INCR,
STATE_FLUSH_WRITE_BACK, STATE_FLUSH_WRITE_BACK} statetype;
STATE_FLUSH_CLEAR_DIRTY} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState; (* mark_debug = "true" *) statetype CurrState, NextState;
logic IgnoreRequest; logic IgnoreRequest;
@ -115,7 +109,7 @@ module cachefsm
assign DoRead = CacheRW[1] & ~IgnoreRequest; assign DoRead = CacheRW[1] & ~IgnoreRequest;
assign DoWrite = CacheRW[0] & ~IgnoreRequest; assign DoWrite = CacheRW[0] & ~IgnoreRequest;
assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit; assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit & ~InvalidateCache;
assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit; assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit;
assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit); assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit);
assign FlushFlag = FlushAdrFlag & FlushWayFlag; assign FlushFlag = FlushAdrFlag & FlushWayFlag;
@ -136,38 +130,31 @@ module cachefsm
always_comb begin always_comb begin
NextState = STATE_READY; NextState = STATE_READY;
case (CurrState) case (CurrState)
STATE_READY: if(IgnoreRequest) NextState = STATE_READY; STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY;
else if(DoFlush) NextState = STATE_FLUSH; else if(DoFlush) NextState = STATE_FLUSH;
else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY; // Delayed LRU update. Cannot check if victim line is dirty on this cycle.
else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // change // To optimize do the fetch first, then eviction if necessary.
else NextState = STATE_READY; else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV;
STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_FETCH_DONE; else NextState = STATE_READY;
else NextState = STATE_MISS_FETCH_WDV; STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE;
STATE_MISS_FETCH_DONE: if(VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY;
else NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_FETCH_WDV;
STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_WORD; STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY;
STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD; STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE;
else NextState = STATE_MISS_READ_WORD_DELAY; else NextState = STATE_MISS_EVICT_DIRTY;
STATE_MISS_READ_WORD_DELAY: if(CPUBusy) NextState = STATE_CPU_BUSY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack.
else NextState = STATE_READY; STATE_FLUSH: NextState = STATE_FLUSH_CHECK;
STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK;
else NextState = STATE_READY; else if(FlushFlag) NextState = STATE_READY;
STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_FLUSH_CHECK;
STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK;
else NextState = STATE_READY; STATE_FLUSH_WRITE_BACK: if(CacheBusAck) begin
STATE_FLUSH: NextState = STATE_FLUSH_CHECK; if(FlushFlag) NextState = STATE_READY;
STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else if(FlushFlag) NextState = STATE_READY; else NextState = STATE_FLUSH_CHECK;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; end else NextState = STATE_FLUSH_WRITE_BACK;
else NextState = STATE_FLUSH_CHECK; default: NextState = STATE_READY;
STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_WRITE_BACK: if(CacheBusAck) NextState = STATE_FLUSH_CLEAR_DIRTY;
else NextState = STATE_FLUSH_WRITE_BACK;
STATE_FLUSH_CLEAR_DIRTY: if(FlushFlag) NextState = STATE_READY;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_FLUSH_CHECK;
default: NextState = STATE_READY;
endcase endcase
end end
@ -175,63 +162,48 @@ module cachefsm
assign CacheCommitted = CurrState != STATE_READY; assign CacheCommitted = CurrState != STATE_READY;
assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) | assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) |
(CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_WDV) |
(CurrState == STATE_MISS_FETCH_DONE) |
(CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write.
(CurrState == STATE_MISS_READ_WORD) |
(CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH) |
(CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) |
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_INCR) |
(CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck);
(CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag));
// write enables internal to cache // write enables internal to cache
assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE;
assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) |
(CurrState == STATE_MISS_READ_WORD_DELAY & AMO) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0]));
(CurrState == STATE_MISS_WRITE_WORD);
assign ClearValid = '0; assign ClearValid = '0;
assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY); (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck);
assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) |
(CurrState == STATE_MISS_READ_WORD_DELAY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE);
(CurrState == STATE_MISS_WRITE_WORD);
// Flush and eviction controls // Flush and eviction controls
assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty);
assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) |
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK);
(CurrState == STATE_FLUSH_CLEAR_DIRTY);
assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag;
assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayAndNotAdrFlag) | assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayAndNotAdrFlag) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayAndNotAdrFlag); (CurrState == STATE_FLUSH_WRITE_BACK & FlushWayAndNotAdrFlag & CacheBusAck);
assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) | assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY & ~FlushFlag); (CurrState == STATE_FLUSH_WRITE_BACK & ~FlushFlag & CacheBusAck);
assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushAdrCntRst = (CurrState == STATE_READY);
assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR);
// Bus interface controls // Bus interface controls
assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss); assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss);
assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) |
(CurrState == STATE_FLUSH_CHECK & VictimDirty); (CurrState == STATE_FLUSH_CHECK & VictimDirty);
// handle cpu stall.
assign restore = ((CurrState == STATE_CPU_BUSY)) & ~`REPLAY;
assign save = ((CurrState == STATE_READY & DoAnyHit & CPUBusy) |
(CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | CacheRW[1]) & CPUBusy) |
(CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY;
// **** can this be simplified? // **** can this be simplified?
assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss. assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss.
// use the raw requests as we don't want IgnoreRequestTrapM in the critical path // use the raw requests as we don't want IgnoreRequestTrapM in the critical path
(CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed
(CurrState == STATE_READY & (CacheRW[1] & CacheHit) & (CPUBusy & `REPLAY)) | (CurrState == STATE_READY & (DoAnyMiss)) |
(CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_WDV) |
(CurrState == STATE_MISS_FETCH_DONE) |
(CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) |
(CurrState == STATE_MISS_READ_WORD) |
(CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | (CPUBusy & `REPLAY))) |
(CurrState == STATE_MISS_WRITE_WORD) |
(CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) |
resetDelay; resetDelay;
assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE;
assign SRAMEnable = (CurrState == STATE_READY & ~CPUBusy | CacheStall) | (CurrState != STATE_READY) | reset;
endmodule // cachefsm endmodule // cachefsm

View File

@ -33,12 +33,12 @@
module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) ( parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (
input logic clk, input logic clk,
input logic ce,
input logic reset, input logic reset,
input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [$clog2(NUMLINES)-1:0] RAdr,
input logic [`PA_BITS-1:0] PAdr, input logic [`PA_BITS-1:0] PAdr,
input logic [LINELEN-1:0] CacheWriteData, input logic [LINELEN-1:0] CacheWriteData,
input logic FLoad2,
input logic SetValidWay, input logic SetValidWay,
input logic ClearValidWay, input logic ClearValidWay,
input logic SetDirtyWay, input logic SetDirtyWay,
@ -48,16 +48,19 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic VictimWay, input logic VictimWay,
input logic FlushWay, input logic FlushWay,
input logic Invalidate, input logic Invalidate,
input logic [(`XLEN-1)/8:0] ByteMask, // input logic [(`XLEN-1)/8:0] ByteMask,
input logic [LINELEN/8-1:0] LineByteMask,
output logic [LINELEN-1:0] ReadDataLineWay, output logic [LINELEN-1:0] ReadDataLineWay,
output logic HitWay, output logic HitWay,
output logic VictimDirtyWay, output logic VictimDirtyWay,
output logic [TAGLEN-1:0] VictimTagWay); output logic [TAGLEN-1:0] VictimTagWay);
localparam WORDSPERLINE = LINELEN/`XLEN; localparam integer WORDSPERLINE = LINELEN/`XLEN;
localparam integer BYTESPERLINE = LINELEN/8;
localparam LOGWPL = $clog2(WORDSPERLINE); localparam LOGWPL = $clog2(WORDSPERLINE);
localparam LOGXLENBYTES = $clog2(`XLEN/8); localparam LOGXLENBYTES = $clog2(`XLEN/8);
localparam integer BYTESPERWORD = `XLEN/8;
logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] ValidBits;
logic [NUMLINES-1:0] DirtyBits; logic [NUMLINES-1:0] DirtyBits;
@ -69,29 +72,23 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic SelTag; logic SelTag;
logic [$clog2(NUMLINES)-1:0] RAdrD; logic [$clog2(NUMLINES)-1:0] RAdrD;
logic [2**LOGWPL-1:0] MemPAdrDecoded; logic [2**LOGWPL-1:0] MemPAdrDecoded;
logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn; logic SelectedWriteWordEn;
logic [(`XLEN-1)/8:0] FinalByteMask; // logic [WORDSPERLINE-1:0] SelectedWriteWordEn;
// logic [(`XLEN-1)/8:0] FinalByteMask;
logic [LINELEN/8-1:0] FinalByteMask;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux // Write Enable demux
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
if(`LLEN>`XLEN)begin
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
end else
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
// If writing the whole line set all write enables to 1, else only set the correct word. // If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND assign SelectedWriteWordEn = SetValidWay | SetDirtyWay;// ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Tag Array // Tag Array
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce,
.Adr(RAdr), .ReadData(ReadTag), .ByteMask('1), .Adr(RAdr), .ReadData(ReadTag), .ByteMask('1),
.CacheWriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(SetValidWay)); .CacheWriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(SetValidWay));
@ -106,11 +103,18 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
genvar words; genvar words;
for(words = 0; words < LINELEN/`XLEN; words++) begin: word
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), localparam integer SRAMLEN = 128;
.ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), localparam integer NUMSRAM = LINELEN/SRAMLEN;
.CacheWriteData(CacheWriteData[(words+1)*`XLEN-1:words*`XLEN]), localparam integer SRAMLENINBYTES = SRAMLEN/8;
.WriteEnable(SelectedWriteWordEn[words]), .ByteMask(FinalByteMask)); localparam integer LOGNUMSRAM = $clog2(NUMSRAM);
for(words = 0; words < NUMSRAM; words++) begin: word
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .Adr(RAdr),
.ReadData(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.CacheWriteData(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
//.WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
.WriteEnable(SelectedWriteWordEn), .ByteMask(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
end end
// AND portion of distributed read multiplexers // AND portion of distributed read multiplexers
@ -126,7 +130,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
else if (SetValidWay) ValidBits[RAdr] <= #1 1'b1; else if (SetValidWay) ValidBits[RAdr] <= #1 1'b1;
else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0; else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0;
end end
flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); flopen #($clog2(NUMLINES)) RAdrDelayReg(clk, ce, RAdr, RAdrD);
assign Valid = ValidBits[RAdrD]; assign Valid = ValidBits[RAdrD];
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -37,6 +37,7 @@
module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
input logic clk, input logic clk,
input logic ce,
input logic [$clog2(DEPTH)-1:0] Adr, input logic [$clog2(DEPTH)-1:0] Adr,
input logic [WIDTH-1:0] CacheWriteData, input logic [WIDTH-1:0] CacheWriteData,
input logic WriteEnable, input logic WriteEnable,
@ -46,13 +47,14 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
logic [WIDTH-1:0] StoredData[DEPTH-1:0]; logic [WIDTH-1:0] StoredData[DEPTH-1:0];
logic [$clog2(DEPTH)-1:0] AdrD; logic [$clog2(DEPTH)-1:0] AdrD;
always_ff @(posedge clk) AdrD <= Adr; always_ff @(posedge clk) if(ce) AdrD <= Adr;
genvar index; genvar index;
if (`USE_SRAM == 1) begin if (`USE_SRAM == 1) begin
// 64 x 128-bit SRAM // 64 x 128-bit SRAM
// check if the size is ok, complain if not***
logic [WIDTH-1:0] BitWriteMask; logic [WIDTH-1:0] BitWriteMask;
for (index=0; index < WIDTH; index++) for (index=0; index < WIDTH; index++)
assign BitWriteMask[index] = ByteMask[index/8]; assign BitWriteMask[index] = ByteMask[index/8];
@ -65,13 +67,13 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
end else begin end else begin
if (WIDTH%8 != 0) // handle msbs if not a multiple of 8 if (WIDTH%8 != 0) // handle msbs if not a multiple of 8
always_ff @(posedge clk) always_ff @(posedge clk)
if (WriteEnable & ByteMask[WIDTH/8]) if (ce & WriteEnable & ByteMask[WIDTH/8])
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1
CacheWriteData[WIDTH-1:WIDTH-WIDTH%8]; CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
for(index = 0; index < WIDTH/8; index++) for(index = 0; index < WIDTH/8; index++)
always_ff @(posedge clk) always_ff @(posedge clk)
if(WriteEnable & ByteMask[index]) if(ce & WriteEnable & ByteMask[index])
StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8]; StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8];
assign ReadData = StoredData[AdrD]; assign ReadData = StoredData[AdrD];

View File

@ -30,11 +30,8 @@
`include "wally-config.vh" `include "wally-config.vh"
module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)( module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)(
input logic clk,
input logic reset,
input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr,
input logic save, restore,
input logic [LINELEN-1:0] ReadDataLine, input logic [LINELEN-1:0] ReadDataLine,
output logic [WORDLEN-1:0] ReadDataWord); output logic [WORDLEN-1:0] ReadDataWord);
@ -43,7 +40,6 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
localparam PADLEN = WORDLEN-MUXINTERVAL; localparam PADLEN = WORDLEN-MUXINTERVAL;
logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad; logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad;
logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0]; logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0];
logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved;
if (PADLEN > 0) begin if (PADLEN > 0) begin
logic [PADLEN-1:0] Pad; logic [PADLEN-1:0] Pad;
@ -56,11 +52,5 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)]; assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)];
end end
// variable input mux // variable input mux
// *** maybe remove REPLAY config later after deciding which way is best assign ReadDataWord = ReadDataLineSets[PAdr];
assign ReadDataWordRaw = ReadDataLineSets[PAdr];
if(!`REPLAY) begin
flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved);
mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved,
restore, ReadDataWord);
end else assign ReadDataWord = ReadDataWordRaw;
endmodule endmodule

View File

@ -49,7 +49,6 @@ module ahblite (
input logic [2:0] IFUBurstType, input logic [2:0] IFUBurstType,
input logic [1:0] IFUTransType, input logic [1:0] IFUTransType,
input logic IFUTransComplete, input logic IFUTransComplete,
input logic [(`XLEN-1)/8:0] ByteMaskM,
// Signals from Data Cache // Signals from Data Cache
input logic [`PA_BITS-1:0] LSUBusAdr, input logic [`PA_BITS-1:0] LSUBusAdr,
@ -157,8 +156,7 @@ module ahblite (
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
assign HMASTLOCK = 0; // no locking supported assign HMASTLOCK = 0; // no locking supported
assign HWRITE = (NextBusState == MEMWRITE); assign HWRITE = (NextBusState == MEMWRITE);
//assign HWSTRB = ByteMaskM; // Byte mask for HWSTRB
// Byte mask for HWSTRB
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB));
// delay write data by one cycle for // delay write data by one cycle for

View File

@ -60,10 +60,11 @@ module cvtshiftcalc(
// - otherwise: // - otherwise:
// | LzcInM | 0's if nessisary | // | LzcInM | 0's if nessisary |
// change to int shift to the left one // change to int shift to the left one
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : always_comb
{CvtLzcIn, {`NF+1{1'b0}}}; if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
// choose the negative of the fraction size // choose the negative of the fraction size
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin

View File

@ -1,10 +1,11 @@
`include "wally-config.vh" `include "wally-config.vh"
module divshiftcalc( module divshiftcalc(
input logic [`DIVLEN+2:0] Quot, input logic [`DIVb-(`RADIX/4):0] DivQm,
input logic [`FMTBITS-1:0] Fmt, input logic [`FMTBITS-1:0] Fmt,
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, input logic Sqrt,
input logic [`NE+1:0] DivCalcExp, input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic [`NE+1:0] DivQe,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn, output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm, output logic DivResDenorm,
@ -14,27 +15,28 @@ module divshiftcalc(
// is the result denromalized // is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]); assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the result is denormalized // if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp // 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1 // .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1 // Left shift amount = DivQe+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp; assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
// if the result is normalized // if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp // 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 // 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF // inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`NE+2)'(`NF); assign NormShift = (`NE+2)'(`NF);
// if the shift amount is negitive then dont shift (keep sticky bit) // if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~(DivDenormShift[`NE+1]|Sqrt)}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
// *** may be able to reduce shifter size assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1+(`RADIX/4)-`NF{1'b0}}};
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
endmodule endmodule

View File

@ -34,35 +34,37 @@ module divsqrt(
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [`FMTBITS-1:0] FmtE, input logic [`FMTBITS-1:0] FmtE,
input logic [`NF:0] XManE, YManE, input logic XsE,
input logic [`NE-1:0] XExpE, YExpE, input logic [`NF:0] XmE, YmE,
input logic [`NE-1:0] XeE, YeE,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
input logic DivStartE, input logic DivStartE,
input logic StallM, input logic StallM,
input logic StallE, input logic StallE,
output logic DivStickyM, input logic SqrtE, SqrtM,
output logic DivNegStickyM, output logic DivSM,
output logic DivBusy, output logic DivBusy,
output logic DivDone, output logic DivDone,
output logic [`NE+1:0] DivCalcExpM, output logic [`NE+1:0] QeM,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`DIVLEN+2:0] QuotM output logic [`DIVb-(`RADIX/4):0] QmM
// output logic [`XLEN-1:0] RemM, // output logic [`XLEN-1:0] RemM,
); );
logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVb+3:0] NextWSN, NextWCN;
logic [`DIVLEN+3:0] WS, WC; logic [`DIVb+3:0] WS, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVb+3:0] StickyWSA;
logic [`DIVLEN-1:0] X; logic [`DIVb:0] X;
logic [`DIVLEN-1:0] Dpreproc; logic [`DIVN-2:0] Dpreproc;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DURLEN-1:0] Dur;
logic NegSticky;
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M)); .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); .StickyWSA, .DivBusy, .Qm(QmM));
endmodule endmodule

View File

@ -29,29 +29,29 @@
`include "wally-config.vh" `include "wally-config.vh"
module fclassify ( module fclassify (
input logic XSgnE, // sign bit input logic Xs, // sign bit
input logic XNaNE, // is NaN input logic XNaN, // is NaN
input logic XSNaNE, // is signaling NaN input logic XSNaN, // is signaling NaN
input logic XDenormE, // is denormal input logic XDenorm,// is denormal
input logic XZeroE, // is zero input logic XZero, // is zero
input logic XInfE, // is infinity input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassResE // classify result output logic [`XLEN-1:0] ClassRes// classify result
); );
logic PInf, PZero, PNorm, PDenorm; logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm; logic NInf, NZero, NNorm, NDenorm;
logic XNormE; logic XNorm;
// determine the sub categories // determine the sub categories
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE); assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
assign PInf = ~XSgnE&XInfE; assign PInf = ~Xs&XInf;
assign NInf = XSgnE&XInfE; assign NInf = Xs&XInf;
assign PNorm = ~XSgnE&XNormE; assign PNorm = ~Xs&XNorm;
assign NNorm = XSgnE&XNormE; assign NNorm = Xs&XNorm;
assign PDenorm = ~XSgnE&XDenormE; assign PDenorm = ~Xs&XDenorm;
assign NDenorm = XSgnE&XDenormE; assign NDenorm = Xs&XDenorm;
assign PZero = ~XSgnE&XZeroE; assign PZero = ~Xs&XZero;
assign NZero = XSgnE&XZeroE; assign NZero = Xs&XZero;
// determine sub category and combine into the result // determine sub category and combine into the result
// bit 0 - -Inf // bit 0 - -Inf
@ -64,6 +64,6 @@ module fclassify (
// bit 7 - +Inf // bit 7 - +Inf
// bit 8 - signaling NaN // bit 8 - signaling NaN
// bit 9 - quiet NaN // bit 9 - quiet NaN
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule endmodule

View File

@ -27,9 +27,10 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE. // OR OTHER DEALINGS IN THE SOFTWARE.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
// FOpCtrlE values // OpCtrl values
// 110 min // 110 min
// 101 max // 101 max
// 010 equal // 010 equal
@ -37,36 +38,32 @@
// 011 less than or equal // 011 less than or equal
module fcmp ( module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] FOpCtrlE, // see above table input logic [2:0] OpCtrl, // see above table
input logic XSgnE, YSgnE, // input signs input logic Xs, Ys, // input signs
input logic [`NE-1:0] XExpE, YExpE, // input exponents input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] XManE, YManE, // input mantissa input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZeroE, YZeroE, // is zero input logic XZero, YZero, // is zero
input logic XNaNE, YNaNE, // is NaN input logic XNaN, YNaN, // is NaN
input logic XSNaNE, YSNaNE, // is signaling NaN input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNVE, // invalid flag output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpResE, // compare resilt output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntResE // compare resilt output logic [`XLEN-1:0] CmpIntRes // compare integer result
); );
logic LTabs, LT, EQ; // is X < or > or = Y logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero, EitherNaN, EitherSNaN; logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs); assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
// assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression assign EQ = (X == Y);
//assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]}); assign BothZero = XZero&YZero;
//assign LT = XInt < YInt; assign EitherNaN = XNaN|YNaN;
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE; assign EitherSNaN = XSNaN|YSNaN;
assign EQ = (FSrcXE == FSrcYE);
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
// flags // flags
@ -74,78 +71,91 @@ module fcmp (
// LT/LE - signaling - sets invalid if NaN input // LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input // EQ - quiet - sets invalid if signaling NaN input
always_comb begin always_comb begin
case (FOpCtrlE[2:0]) case (OpCtrl[2:0])
3'b110: CmpNVE = EitherSNaN;//min 3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max 3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal 3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than 3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal 3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNVE = 1'bx; default: CmpNV = 1'bx;
endcase endcase
end end
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X // for IEEE, return the payload of X
// for RISC-V, return the canonical NaN // for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1) if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
else if (`FPSIZES == 2) else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]}; if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3) else if (`FPSIZES == 3)
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: `FMT:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1: `FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]}; if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2: `FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]}; if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}}; default: NaNRes = {`FLEN{1'bx}};
endcase endcase
else if (`FPSIZES == 4) else if (`FPSIZES == 4)
always_comb always_comb
case (FmtE) case (Fmt)
2'h3: 2'h3:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]}; if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
2'h1: 2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
2'h0: 2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
2'h2: 2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]}; if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
endcase endcase
// when one input is a NaN -output the non-NaN
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN}; // Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
always_comb
if(OpCtrl[0]) // MAX
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
endmodule endmodule

View File

@ -29,25 +29,44 @@
`include "wally-config.vh" `include "wally-config.vh"
module fctrl ( module fctrl (
input logic clk,
input logic reset,
input logic StallE, StallM, StallW, // stall signals
input logic FlushE, FlushM, FlushW, // flush signals
input logic [31:0] InstrD,
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
input logic [6:0] OpD, // bits 6:0 of instruction input logic [6:0] OpD, // bits 6:0 of instruction
input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [2:0] FRM_REGW, // rounding mode from CSR
input logic [1:0] STATUS_FS, // is FPU enabled? input logic [1:0] STATUS_FS, // is FPU enabled?
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction input logic FDivBusyE, // is the divider busy
output logic FRegWriteD, // FP register write enable output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
output logic FDivStartD, // Start division or squareroot output logic FRegWriteM, FRegWriteW, // FP register write enable
output logic [1:0] FResSelD, // select result to be written to fp register output logic [2:0] FrmM, // FP rounding mode
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
output logic [1:0] PostProcSelD, output logic DivStartE, // Start division or squareroot
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1 output logic XEnE, YEnE, ZEnE,
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude output logic YEnForwardE, ZEnForwardE,
output logic FWriteIntD // is the result written to the integer register output logic FWriteIntE, FWriteIntM, // Write to integer register
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
); );
`define FCTRLW 11 `define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD; logic [`FCTRLW-1:0] ControlsD;
logic IllegalFPUInstrE;
logic FRegWriteD; // FP register write enable
logic DivStartD; // integer register write enable
logic FWriteIntD; // integer register write enable
logic FRegWriteE; // FP register write enable
logic [2:0] OpCtrlD; // Select which opperation to do in each component
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [`FMTBITS-1:0] FmtD; // FP format
//*** will putting x for don't cares reduce area in synthisis??? //*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder // FPU Instruction Decoder
always_comb always_comb
@ -130,7 +149,7 @@ module fctrl (
endcase endcase
// unswizzle control bits // unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes: // rounding modes:
// 000 - round to nearest, ties to even // 000 - round to nearest, ties to even
@ -155,6 +174,20 @@ module fctrl (
else if (`FPSIZES == 3|`FPSIZES == 4) else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// enables:
// X - all except int->fp, store, load, mv int->fp
// Y - all except cvt, mv, load, class, sqrt
// Z - fma ops only
// load/store mv int->fp cvt int->fp
assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2]));
// load/class mv cvt
assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0]))));
assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]);
assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0]))));
assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2];
// Final Res Sel: // Final Res Sel:
// fp int // fp int
// 00 other cmp // 00 other cmp
@ -168,7 +201,7 @@ module fctrl (
// 10 fma // 10 fma
// Other Sel: // Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]} // Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00 // 000 - sign 00
// 001 - negate sign 00 // 001 - negate sign 00
// 010 - xor sign 00 // 010 - xor sign 00
@ -186,8 +219,8 @@ module fctrl (
// 110 - add // 110 - add
// 111 - sub // 111 - sub
// Div: // Div:
// 0 - ??? // 0 - div
// 1 - ??? // 1 - sqrt
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?} // Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
// Cvt Fp: output format // Cvt Fp: output format
// 10 - to half // 10 - to half
@ -205,5 +238,24 @@ module fctrl (
// 01 - negate sign // 01 - negate sign
// 10 - xor sign // 10 - xor sign
// D/E pipleine register
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
if(`FLEN>`XLEN)
flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE);
// E/M pipleine register
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
if(`FLEN>`XLEN)
flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM);
// M/W pipleine register
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM},
{FRegWriteW, FResSelW});
endmodule endmodule

View File

@ -35,7 +35,7 @@ module fcvt (
input logic [`NE-1:0] Xe, // input's exponent input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] FOpCtrl, // choose which opperation (look below for values) input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register) input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero input logic XZero, // is the input zero
input logic XDenorm, // is the input denormalized input logic XDenorm, // is the input denormalized
@ -68,21 +68,22 @@ module fcvt (
logic Signed; // is the opperation with a signed integer? logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits? logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion? logic IntToFp; // is the opperation an int->fp conversion?
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability // seperate OpCtrl for code readability
assign Signed = FOpCtrl[0]; assign Signed = OpCtrl[0];
assign Int64 = FOpCtrl[1]; assign Int64 = OpCtrl[1];
assign IntToFp = FOpCtrl[2]; assign IntToFp = OpCtrl[2];
// choose the ouptut format depending on the opperation // choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output // - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output // - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2) if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4) else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@ -102,10 +103,11 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder // choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) | // int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
{Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; {Xm, {`CVTLEN-`NF{1'b0}}};
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros)); lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// shifter // shifter
@ -119,13 +121,14 @@ module fcvt (
// denormalized/undeflowed result fp -> fp: // denormalized/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp: // ??? -> fp:
// - shift left by LeadingZeros+1 - to shift till the result is normalized // - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is denormalized // - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa // - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : always_comb
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
(LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}}; else if (ResDenormUf&~IntToFp) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
else ShiftAmt = LeadingZeros;
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// exp calculations // exp calculations
@ -148,7 +151,9 @@ module fcvt (
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); logic [`NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp; logic [`NE-2:0] NewBiasToFp;
@ -175,7 +180,7 @@ module fcvt (
// select the old exponent // select the old exponent
// int -> fp : largest bias + XLEN // int -> fp : largest bias + XLEN
// fp -> ??? : XExp // fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe; assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
// calculate CalcExp // calculate CalcExp
// fp -> fp : // fp -> fp :
@ -197,14 +202,14 @@ module fcvt (
// | 0's | Mantissa | 0's if nessisary | // | 0's | Mantissa | 0's if nessisary |
// | keep | // | keep |
// //
// - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options // - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros // int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
// Process: // Process:
// - shifted right by XLEN (XLEN) // - shifted right by XLEN (XLEN)
// - shift left to normilize (-1-LeadingZeros) // - shift left to normilize (-LeadingZeros)
// - newBias to make the biased exponent // - newBias to make the biased exponent
// oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp) // oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// find if the result is dnormal or underflows // find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion // - can't underflow an integer to Fp conversion
@ -220,7 +225,11 @@ module fcvt (
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed // - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed // - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign // - otherwise: the floating point input's sign
assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs; always_comb
if(IntToFp)
if(Int64) Cs = Int[`XLEN-1]&Signed;
else Cs = Int[31]&Signed;
else Cs = Xs;
endmodule endmodule

View File

@ -31,49 +31,51 @@
`include "wally-config.vh" `include "wally-config.vh"
module fhazard( module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected input logic [1:0] FResSelM, // the result being selected
input logic XEnE, YEnE, ZEnE,
output logic FStallD, // stall the decode stage output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
); );
always_comb begin always_comb begin
// set defaults // set defaults
FForwardXE = 2'b00; // choose FRD1E ForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E ForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E ForwardZE = 2'b00; // choose FRD3E
FStallD = 0; FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
// if the needed value is in the memory stage - input 1 // if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM) if(XEnE)
// if the result will be FResM (can be taken from the memory stage) if ((Adr1E == RdM) & FRegWriteM)
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM // if the result will be FResM (can be taken from the memory stage)
else FStallD = 1; // otherwise stall if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
// if the needed value is in the writeback stage else FStallD = 1; // otherwise stall
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 2 // if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM) if(YEnE)
// if the result will be FResM (can be taken from the memory stage) if ((Adr2E == RdM) & FRegWriteM)
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM // if the result will be FResM (can be taken from the memory stage)
else FStallD = 1; // otherwise stall if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
// if the needed value is in the writeback stage else FStallD = 1; // otherwise stall
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 3 // if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM) if(ZEnE)
// if the result will be FResM (can be taken from the memory stage) if ((Adr3E == RdM) & FRegWriteM)
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM // if the result will be FResM (can be taken from the memory stage)
else FStallD = 1; // otherwise stall if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
// if the needed value is in the writeback stage else FStallD = 1; // otherwise stall
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W // if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
end end

View File

@ -34,24 +34,23 @@ module flags(
input logic XInf, YInf, ZInf, // inputs are infinity input logic XInf, YInf, ZInf, // inputs are infinity
input logic Plus1, input logic Plus1,
input logic InfIn, // is a Inf input being used input logic InfIn, // is a Inf input being used
input logic XZero, YZero, // inputs are zero
input logic XNaN, YNaN, // inputs are NaN
input logic NaNIn, // is a NaN input being used input logic NaNIn, // is a NaN input being used
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic XZero, YZero, // inputs are zero
input logic Sqrt, // Sqrt? input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation? input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation? input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation? input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Nexp, // exponent of the normalized sum input logic [`NE+1:0] Me, // exponent of the normalized sum
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
input logic FmaAs, FmaPs, // the product and modified Z signs input logic FmaAs, FmaPs, // the product and modified Z signs
input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding input logic R, UfL, S, UfPlus1, // bits used to determine rounding
output logic DivByZero, output logic DivByZero,
output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic IntInvalid, Invalid, Overflow, // flags used to select the res
output logic [4:0] PostProcFlg // flags output logic [4:0] PostProcFlg // flags
@ -73,30 +72,30 @@ module flags(
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]); `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
default: ResExpGteMax = 1'bx; default: ResExpGteMax = 1'bx;
endcase endcase
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE]; `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]); `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]); `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]); `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
endcase endcase
// a left shift of intlen+1 is still in range but any more than that is an overflow // a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN | // inital: | 64 0's | XLEN |
@ -110,14 +109,14 @@ module flags(
// - any of the bits after the most significan 1 is one // - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and // - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one // one of the later bits is one
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end end
// if the result is greater than or equal to the max exponent(not taking into account sign) // if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive // | and the exponent isn't negitive
// | | if the input isnt infinity or NaN // | | if the input isnt infinity or NaN
// | | | // | | |
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero); assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
// detecting tininess after rounding // detecting tininess after rounding
// the exponent is negitive // the exponent is negitive
@ -127,11 +126,11 @@ module flags(
// | | | | and if the result is not exact // | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN // | | | | | and if the input isnt infinity or NaN
// | | | | | | // | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed // - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero); assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid);
// if the res is too small to be represented and not 0 // if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds) // | and if the res is not invalid (outside the integer bounds)
@ -153,18 +152,18 @@ module flags(
// | | | | or the res rounds up out of bounds // | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow // | | | | and the res didn't underflow
// | | | | | // | | | | |
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// | // |
// or when the positive res rounds up out of range // or when the positive res rounds up out of range
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf); assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
// if dividing by zero and not 0/0 // if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn); assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
// Combine flags // Combine flags
// - to integer results do not set the underflow or overflow flags // - to integer results do not set the underflow or overflow flags

View File

@ -1,7 +1,7 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// //
// Written: me@KatherineParry.com, David Harris // Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified: 6/23/2021 // Modified:
// //
// Purpose: Floating point multiply-accumulate of configurable size // Purpose: Floating point multiply-accumulate of configurable size
// //
@ -34,7 +34,7 @@ module fma(
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic ZmSticky, // sticky bit that is calculated during alignment output logic ZmSticky, // sticky bit that is calculated during alignment
@ -44,14 +44,15 @@ module fma(
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign output logic Ps, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count output logic Ss, // the sum's sign
output logic [`NE+1:0] Se,
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count
); );
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Calculate the product // Calculate the product
// - When multipliying two fp numbers, add the exponents // - When multipliying two fp numbers, add the exponents
@ -62,234 +63,29 @@ module fma(
// calculate the product's exponent // calculate the product's exponent
expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); fmaexpadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe);
// multiplication of the mantissa's // multiplication of the mantissa's
mult mult(.Xm, .Ym, .Pm); fmamult mult(.Xm, .Ym, .Pm);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Alignment shifter // Alignment shifter
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// calculate the signs and take the opperation into account // calculate the signs and take the opperation into account
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// /////////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA // // Addition/LZA
// /////////////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////////////
add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt); fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt);
endmodule endmodule
module expadd(
input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
// kill the exponent if the product is zero - either X or Y is 0
assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}};
endmodule
module mult(
input logic [`NF:0] Xm, Ym,
output logic [2*`NF+1:0] Pm
);
assign Pm = Xm * Ym;
endmodule
module sign(
input logic [2:0] FOpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As // aligned addend sign used in fma - takes opperation into account
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
// flip if subtraction
assign As = Zs^FOpCtrl[0];
endmodule
module align(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ZmSticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProd = ACnt[`NE+1]|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = ZmPreshifted;
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ZmSticky = ~ZZero;
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ZmSticky = |(ZmShifted[`NF-1:0]);
end
end
assign Am = ZmShifted[4*`NF+5:`NF];
endmodule
module add(
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic KillProd, // should the product be set to 0
input logic XZero, YZero, // is the input zero
output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic InvA, // do you invert the aligned addend
output logic [3*`NF+5:0] Sm, // the positive sum
output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum
);
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvA = As ^ Ps;
// Choose an inverted or non-inverted addend - the one has to be added now for the LZA
assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = Pm&{2*`NF+2{~KillProd}};
// Do the addition
// - calculate a positive and negitive sum in parallel
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
// Is the sum negitive
assign NegSum = PreSum[3*`NF+6];
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
logic [3*`NF+6:0] G;
logic [3*`NF+6:0] Z;
logic [3*`NF+6:0] f;
assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
assign G[3*`NF+6:2*`NF+4] = 0;
assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
assign T[1:0] = A[1:0];
assign G[1:0] = 0;
assign Z[1:0] = ~A[1:0];
// Apply function to determine Leading pattern
// - note: the paper linked above uses the numbering system where 0 is the most significant bit
//f[n] = ~T[n]&T[n-1] note: n is the MSB
//f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1]))
assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5];
assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1}));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
endmodule

View File

@ -0,0 +1,75 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA significand adder
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaadd(
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic InvA, // invert the aligned addend
input logic KillProd, // should the product be set to 0
input logic ZmSticky,
input logic [`NE-1:0] Ze,
input logic [`NE+1:0] Pe,
output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic Ss,
output logic [`NE+1:0] Se,
output logic [3*`NF+5:0] Sm // the positive sum
);
logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
assign AmInv = InvA ? ~Am : Am;
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = KillProd ? '0 : Pm;
// Do the addition
// - calculate a positive and negitive sum in parallel
// Zsticky Psticky
// PreSum -1 = don't add 1 +1 = add 2
// NegPreSum +1 = add 2 -1 = don't add 1
// for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum : PreSum;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Ss = NegSum^Ps;
assign Se = KillProd ? {2'b0, Ze} : Pe;
endmodule

View File

@ -0,0 +1,101 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA alginment shift
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaalign(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ZmSticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ZmSticky = ~ZZero;
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ZmSticky = |(ZmShifted[`NF-1:0]);
end
end
assign Am = ZmShifted[4*`NF+5:`NF];
endmodule

View File

@ -0,0 +1,45 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA exponent addition
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaexpadd(
input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
logic PZero;
// kill the exponent if the product is zero - either X or Y is 0
assign PZero = XZero | YZero;
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
endmodule

View File

@ -0,0 +1,60 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: Leading Zero Anticipator
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [WIDTH-1:0] A, // addend
input logic [2*`NF+3:0] Pm, // product
input logic Cin, // carry in
input logic sub,
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
);
logic [WIDTH:0] F;
logic [WIDTH-1:0] B, P, G, K;
logic [WIDTH-1:0] Pp1, Gm1, Km1;
assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product
assign P = A^B;
assign G = A&B;
assign K= ~A&~B;
assign Pp1 = {sub, P[WIDTH-1:1]};
assign Gm1 = {G[WIDTH-2:0], Cin};
assign Km1 = {K[WIDTH-2:0], ~Cin};
// Apply function to determine Leading pattern
// - note: the paper linked above uses the numbering system where 0 is the most significant bit
assign F[WIDTH] = ~sub&P[WIDTH-1];
assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1));
lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt));
endmodule

View File

@ -0,0 +1,38 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA Significand Multiplier
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmamult(
input logic [`NF:0] Xm, Ym,
output logic [2*`NF+1:0] Pm
);
assign Pm = Xm * Ym;
endmodule

View File

@ -32,55 +32,57 @@ module fmashiftcalc(
input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE-1:0] Ze, // exponent of Z
input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero input logic FmaKillProd, // is the product set to zero
input logic ZDenorm, input logic [`NE+1:0] FmaSe,
output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSmZero, // is the result denormalized - calculated before LZA corection output logic FmaSZero, // is the result denormalized - calculated before LZA corection
output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
); );
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Normalization // Normalization
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures //*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero // Determine if the sum is zero
assign FmaSmZero = ~(|FmaSm); assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent // calculate the sum's exponent
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision //convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign FmaConvNormSumExp = NormSumExp; assign NormSumExp = PreNormSumExp;
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
always_comb begin always_comb begin
case (Fmt) case (Fmt)
`FMT: FmaConvNormSumExp = NormSumExp; `FMT: BiasCorr = '0;
`FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
`FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
default: FmaConvNormSumExp = {`NE+2{1'bx}}; default: BiasCorr = 'x;
endcase endcase
end end
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
always_comb begin always_comb begin
case (Fmt) case (Fmt)
2'h3: FmaConvNormSumExp = NormSumExp; 2'h3: BiasCorr = '0;
2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
endcase endcase
end end
assign NormSumExp = PreNormSumExp+BiasCorr;
end end
@ -88,52 +90,52 @@ module fmashiftcalc(
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL; logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero; assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
always_comb begin always_comb begin
case (Fmt) case (Fmt)
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultDenorm = 1'bx; default: FmaPreResultDenorm = 1'bx;
endcase endcase
end end
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2)); assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)); assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp; assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)); assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp; assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
always_comb begin always_comb begin
case (Fmt) case (Fmt)
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero; 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking endcase
end end
end end
@ -144,13 +146,13 @@ module fmashiftcalc(
// - if kill prod dont add to exp // - if kill prod dont add to exp
// Determine if the result is denormal // Determine if the result is denormal
// assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero; // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount // set and calculate the shift input and amount
// - shift once if killing a product and the result is denormalized // - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm}; assign FmaShiftIn = {3'b0, FmaSm};
assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift; if (`FPSIZES == 1)
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
else
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
endmodule endmodule

View File

@ -0,0 +1,49 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA Sign Logic
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmasign(
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As, // aligned addend sign used in fma - takes opperation into account
output logic InvA // Effective subtraction: invert addend
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
// flip addend sign for subtraction
assign As = Zs^OpCtrl[0];
// Effective subtraction when product and addend have opposite signs
assign InvA = As ^ Ps;
endmodule

View File

@ -30,28 +30,28 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
input logic [31:0] InstrD, // instruction from IFU input logic [31:0] InstrD, // instruction (from IFU)
input logic [`FLEN-1:0] ReadDataW,// Read data from memory input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU) input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals from HZU input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled? input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
output logic FRegWriteM, // FP register write enable output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic FLoad2, output logic FStore2, // store two words into memory (to LSU)
output logic FStallD, // Stall the decode stage output logic FStallD, // Stall the decode stage (To HZU)
output logic FWriteIntE, // integer register write enables output logic FWriteIntE, // integer register write enable (to IEU)
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic [1:0] FResSelW, output logic [1:0] FResSelW, // final result selection (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
); );
@ -62,98 +62,91 @@ module fpu (
// - sets the underflow after rounding // - sets the underflow after rounding
// control signals // control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot logic DivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register logic FWriteIntM; // Write to integer register
logic FWriteIntM; // Write to integer register logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input logic IllegalFPUInstrM;
logic XEnE, YEnE, ZEnE;
logic YEnForwardE, ZEnForwardE;
// regfile signals // regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding) logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals // unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage logic XsE, YsE, ZsE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value)
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
// Fma Signals // Fma Signals
logic [3*`NF+5:0] SumE, SumM; logic [3*`NF+5:0] SmE, SmM;
logic [`NE+1:0] ProdExpE, ProdExpM; logic [`NE+1:0] PeE, PeM;
logic AddendStickyE, AddendStickyM; logic ZmStickyE, ZmStickyM;
logic KillProdE, KillProdM; logic [`NE+1:0] SeE,SeM;
logic InvAE, InvAM; logic KillProdE, KillProdM;
logic NegSumE, NegSumM; logic InvAE, InvAM;
logic ZSgnEffE, ZSgnEffM; logic NegSumE, NegSumM;
logic PSgnE, PSgnM; logic AsE, AsM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; logic PsE, PsM;
logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
// Cvt Signals // Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent logic [`NE:0] CeE, CeM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign logic CsE, CsM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero? logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals //divide signals
logic [`DIVLEN+2:0] QuotE, QuotM; logic [`DIVb-(`RADIX/4):0] QmM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic [`NE+1:0] QeE, QeM;
logic DivNegStickyE, DivNegStickyM; logic DivSE, DivSM;
logic DivStickyE, DivStickyM; logic DivDoneM;
logic DivDoneM; logic [`DURLEN-1:0] EarlyTermShiftM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
// result and flag signals // result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result logic [`XLEN-1:0] ClassResE; // classify result
logic [4:0] FDivFlgM; // divide/squareroot flags logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] ReadResW; // read result (load instruction) logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`XLEN-1:0] ClassResE; // classify result logic [`FLEN-1:0] PostProcResM; // classify result
logic [`XLEN-1:0] FIntResE; // classify result logic [4:0] PostProcFlgM; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM; logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid) logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals // other signals
logic FDivSqrtDoneE; // is divide done logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
@ -170,9 +163,11 @@ module fpu (
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals // calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS, fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.FmtD, .FrmD, .FWriteIntD); .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
// FP register file // FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW), fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -184,12 +179,6 @@ module fpu (
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
// EXECUTION STAGE // EXECUTION STAGE
@ -206,12 +195,12 @@ module fpu (
// Hazard unit for FPU // Hazard unit for FPU
// - determines if any forwarding or stalls are needed // - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE); .XEnE, .YEnE(YEnForwardE), .ZEnE(ZEnForwardE), .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs // forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE); mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE); mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE); mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
generate generate
@ -226,7 +215,7 @@ module fpu (
endgenerate endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions // Force Z to be 0 for multiply instructions
generate generate
@ -240,80 +229,76 @@ module fpu (
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate endgenerate
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
// unpack unit // unpack unit
// - splits FP inputs into their various parts // - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
// fma - does multiply, add, and multiply-add instructions // fused multiply add
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), // - fadd/fsub
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), // - fmul
.Xm(XManE), .Ym(YManE), .Zm(ZManE), // - fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE), .OpCtrl(OpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE), .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
.Sm(SumE), .Pe(ProdExpE), .Sm(SmE), .Pe(PeE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE)); .ZmSticky(ZmStickyE), .KillProd(KillProdE));
// // fpdivsqrt using Goldschmidt's iteration // divide and squareroot
// if(`FLEN == 64) begin // - fdiv
// flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), // - fsqrt
// .clear(FDivSqrtDoneE), .en(load_preload), // *** add other opperations
// .reset(reset), .clk(clk)); divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
// flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE,
// .clear(FDivSqrtDoneE), .en(load_preload), .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
// .reset(reset), .clk(clk)); .EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
// end // compare
// else if (`FLEN == 32) begin // - fmin/fmax
// flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), // - flt/fle/feq
// .clear(FDivSqrtDoneE), .en(load_preload), fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
// .reset(reset), .clk(clk)); .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
// flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
// .clear(FDivSqrtDoneE), .en(load_preload), .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// .reset(reset), .clk(clk)); // sign injection
// end // - fsgnj/fsgnjx/fsgnjn
// flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
// .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), // classify
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), // - fclass
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert
// - fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE),
.LzcIn(CvtLzcInE)); .LzcIn(CvtLzcInE));
// data to be stored in memory - to IEU // data to be stored in memory - to IEU
// - FP uses NaN-blocking format // - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
if (`LLEN==`XLEN) begin if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0]; assign FWriteDataE = YE[`XLEN-1:0];
end else begin end else begin
logic [`FLEN-1:0] FWriteDataE; logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM;
else assign FLoad2 = FmtM; else assign FStore2 = FmtM&~IllegalFPUInstrM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE; if (`FPSIZES==1) assign FWriteDataE = YE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
end end
@ -330,14 +315,27 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate endgenerate
// select a result that may be written to the FP register // select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE); mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU // select the result that may be written to the integer register - to IEU
logic [`FLEN-1:0] SgnExtXE;
generate
if(`FPSIZES == 1)
assign SgnExtXE = XE;
else if(`FPSIZES == 2)
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]},
XE, FmtE, SgnExtXE); // NaN boxing zeroes
endgenerate
if (`FLEN>`XLEN) if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0]; assign IntSrcXE = SgnExtXE[`XLEN-1:0];
else else
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
@ -345,27 +343,24 @@ module fpu (
// E/M pipe registers // E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE // BEGIN MEMORY STAGE
@ -381,12 +376,12 @@ module fpu (
assign FpLoadStoreM = FResSelM[1]; assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M), postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged // FPU flag selection - to privileged
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
@ -395,9 +390,6 @@ module fpu (
// M/W pipe registers // M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE

View File

@ -26,60 +26,59 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE. // OR OTHER DEALINGS IN THE SOFTWARE.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
module fsgninj ( module fsgninj (
input logic XSgnE, YSgnE, // X and Y sign bits input logic Xs, Ys, // X and Y sign bits
input logic [`FLEN-1:0] FSrcXE, // X input logic [`FLEN-1:0] X, // X
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // format
input logic [1:0] SgnOpCodeE, // operation control input logic [1:0] OpCtrl, // operation control
output logic [`FLEN-1:0] SgnResE // result output logic [`FLEN-1:0] SgnRes // result
); );
logic ResSgn; logic ResSgn;
//op code designation: // OpCtrl:
// // 00 - fsgnj - directly copy over sign value of Y
//00 - fsgnj - directly copy over sign value of FSrcYE // 01 - fsgnjn - negate sign value of Y
//01 - fsgnjn - negate sign value of FSrcYE // 10 - fsgnjx - XOR sign values of X and Y
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
//
// calculate the result's sign // calculate the result's sign
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE; assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
// format final result based on precision // format final result based on precision
// - uses NaN-blocking format // - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
if (`FPSIZES == 1) if (`FPSIZES == 1)
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2) else if (`FPSIZES == 2)
assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]}; assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin else if (`FPSIZES == 3) begin
logic [2:0] SgnBits; logic [2:0] SgnBits;
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]}; `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]}; `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn}; `FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}}; default: SgnBits = {3{1'bx}};
endcase endcase
assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]}; assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
logic [3:0] SgnBits; logic [3:0] SgnBits;
always_comb always_comb
case (FmtE) case (Fmt)
`Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]}; `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn}; `H_FMT: SgnBits = {3'b111, ResSgn};
endcase endcase
assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]}; assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
end end
endmodule endmodule

View File

@ -42,7 +42,12 @@ module negateintres(
// round and negate the positive res if needed // round and negate the positive res if needed
assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] : always_comb
Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32]; if(Signed)
if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1];
else CvtNegResMsbs = CvtNegRes[32:31];
else
if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN];
else CvtNegResMsbs = CvtNegRes[33:32];
endmodule endmodule

177
pipelined/src/fpu/otfc.sv Normal file
View File

@ -0,0 +1,177 @@
///////////////////////////////////////////
// otfc.sv
//
// Written: me@KatherineParry.com, cturek@hmc.edu
// Modified:7/14/2022
//
// Purpose: On the fly conversion
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module otfc2 (
input logic qp, qz,
input logic [`DIVb:0] Q, QM,
output logic [`DIVb:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
// Use this otfc for division only.
logic [`DIVb-1:0] QR, QMR;
assign QR = Q[`DIVb-1:0];
assign QMR = QM[`DIVb-1:0]; // Shifted Q and QM
always_comb begin
if (qp) begin
QNext = {QR, 1'b1};
QMNext = {QR, 1'b0};
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
endmodule
///////////////////////////////
// Square Root OTFC, Radix 2 //
///////////////////////////////
module sotfc2(
input logic sp, sz,
input logic [`DIVb-1:0] C,
input logic [`DIVb:0] S, SM,
output logic [`DIVb:0] SNext, SMNext
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVb:0] CExt;
assign CExt = {1'b1, C};
always_comb begin
if (sp) begin
SNext = S | (CExt & ~(CExt << 1));
SMNext = S;
end else if (sz) begin
SNext = S;
SMNext = SM | (CExt & ~(CExt << 1));
end else begin // If sp and sz are not true, then sn is
SNext = SM | (CExt & ~(CExt << 1));
SMNext = SM;
end
end
endmodule
module otfc4 (
input logic [3:0] q,
input logic [`DIVb:0] Q, QM,
output logic [`DIVb:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`DIVb-2:0] QR, QMR;
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
assign QR = Q[`DIVb-2:0];
assign QMR = QM[`DIVb-2:0]; // Shifted Q and QM
always_comb begin
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Qmeint is in the range [.5, 2)
endmodule
///////////////////////////////
// Square Root OTFC, Radix 4 //
///////////////////////////////
module sotfc4(
input logic [3:0] s,
input logic Sqrt,
input logic [`DIVLEN+3:0] S, SM,
input logic [`DIVLEN+3:0] C,
output logic [`DIVLEN+3:0] SNext, SMNext
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
always_comb begin
if (s[3]) begin
SNext = S | ((C << 1)&~(C << 2));
SMNext = S | (C&~(C << 1));
end else if (s[2]) begin
SNext = S | (C&~(C << 1));
SMNext = S;
end else if (s[1]) begin
SNext = SM | (C&~(C << 2));
SMNext = SM | ((C << 1)&~(C << 2));
end else if (s[0]) begin
SNext = SM | ((C << 1)&~(C << 2));
SMNext = SM | (C&~(C << 1));
end else begin // If sp and sn are not true, then sz is
SNext = S;
SMNext = SM | (C & ~(C << 2));
end
end
endmodule

View File

@ -29,14 +29,14 @@
`include "wally-config.vh" `include "wally-config.vh"
module postprocess( module postprocess (
// general signals // general signals
input logic Xs, Ys, // input signs input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents input logic [`NE-1:0] Ze, // input exponents
input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] FOpCtrl, // choose which opperation (look below for values) input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, ZZero, // inputs are zero input logic XZero, YZero, ZZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN input logic XNaN, YNaN, ZNaN, // inputs are NaN
@ -46,20 +46,21 @@ module postprocess(
//fma signals //fma signals
input logic FmaAs, // the modified Z sign - depends on instruction input logic FmaAs, // the modified Z sign - depends on instruction
input logic FmaPs, // the product's sign input logic FmaPs, // the product's sign
input logic [`NE+1:0] FmaSe,
input logic [`NE+1:0] FmaPe, // Product exponent input logic [`NE+1:0] FmaPe, // Product exponent
input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic FmaZmSticky, // sticky bit that is calculated during alignment input logic FmaZmS, // sticky bit that is calculated during alignment
input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter
input logic FmaNegSum, // was the sum negitive input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z input logic FmaInvA, // do you invert Z
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count input logic FmaSs,
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
//divide signals //divide signals
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivSticky, input logic DivS,
input logic DivNegSticky,
input logic DivDone, input logic DivDone,
input logic [`NE+1:0] DivCalcExp, input logic [`NE+1:0] DivQe,
input logic [`DIVLEN+2:0] Quot, input logic [`DIVb-(`RADIX/4):0] DivQm,
// conversion signals // conversion signals
input logic CvtCs, // the result's sign input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE:0] CvtCe, // the calculated expoent
@ -69,7 +70,7 @@ module postprocess(
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder)
input logic IntZero, // is the input zero input logic IntZero, // is the input zero
// final results // final results
output logic [`FLEN-1:0] W, // FMA final result output logic [`FLEN-1:0] PostProcRes, // FMA final result
output logic [4:0] PostProcFlg, output logic [4:0] PostProcFlg,
output logic [`XLEN-1:0] FCvtIntRes // the int conversion result output logic [`XLEN-1:0] FCvtIntRes // the int conversion result
); );
@ -78,32 +79,31 @@ module postprocess(
logic Ws; logic Ws;
logic [`NF-1:0] Rf; // Result fraction logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent logic [`NE-1:0] Re; // Result exponent
logic Nsgn; logic Ms;
logic [`NE+1:0] Nexp; logic [`NE+1:0] Me;
logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic S; // S bit logic S; // S bit
logic UfPlus1; // do you add one (for determining underflow flag) logic UfPlus1; // do you add one (for determining underflow flag)
logic R; // bits needed to determine rounding logic R; // bits needed to determine rounding
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result? logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Invalid; // flags logic IntInvalid, Overflow, Invalid; // flags
logic UfLSBRes; logic UfL;
logic [`FMTBITS-1:0] OutFmt; logic [`FMTBITS-1:0] OutFmt;
// fma signals // fma signals
logic [`NE+1:0] FmaSe; // exponent of the normalized sum logic [`NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSmZero; // is the sum zero logic FmaSZero; // is the sum zero
logic [3*`NF+8:0] FmaShiftIn; // shift input logic [3*`NF+8:0] FmaShiftIn; // shift input
logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
// division singals // division singals
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] DivShiftIn; logic [`NORMSHIFTSZ-1:0] DivShiftIn;
logic [`NE+1:0] DivCorrExp; logic [`NE+1:0] Qe;
logic DivByZero; logic DivByZero;
logic DivResDenorm; logic DivResDenorm;
logic [`NE+1:0] DivDenormShift; logic [`NE+1:0] DivDenormShift;
@ -125,26 +125,26 @@ module postprocess(
logic Sqrt; logic Sqrt;
// signals to help readability // signals to help readability
assign Signed = FOpCtrl[0]; assign Signed = OpCtrl[0];
assign Int64 = FOpCtrl[1]; assign Int64 = OpCtrl[1];
assign IntToFp = FOpCtrl[2]; assign IntToFp = OpCtrl[2];
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00); assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10); assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01)&DivDone; assign DivOp = (PostProcSel == 2'b01)&DivDone;
assign Sqrt = FOpCtrl[0]; assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used // is there an input of infinity or NaN being used
assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); assign InfIn = XInf|YInf|ZInf;
assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); assign NaNIn = XNaN|YNaN|ZNaN;
// choose the ouptut format depending on the opperation // choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output // - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output // - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2) if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4) else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Normalization // Normalization
@ -152,9 +152,9 @@ module postprocess(
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
.ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb always_comb
case(PostProcSel) case(PostProcSel)
@ -183,9 +183,9 @@ module postprocess(
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp,
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
.DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac); .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Rounding // Rounding
@ -199,19 +199,19 @@ module postprocess(
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
.Xs, .Ys, .CvtCs, .Nsgn); .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
.FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivNegSticky, .DivDone, .DivS, .DivDone,
.DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Sign calculation // Sign calculation
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S,
.FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws); .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Flags // Flags
@ -219,19 +219,19 @@ module postprocess(
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
.UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Select the result // Select the result
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp, .XInf, .YInf, .DivOp,
.DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes); .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
endmodule endmodule

198
pipelined/src/fpu/qsel.sv Normal file
View File

@ -0,0 +1,198 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz//, qn
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Qmient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign qp = magnitude & ~sign;
assign qz = ~magnitude;
// assign #1 qn = magnitude & sign;
endmodule
////////////////////////////////////
// Adder Input Generation, Radix 2 //
////////////////////////////////////
module fgen2 (
input logic sp, sz,
input logic [`DIVb-1:0] C,
input logic [`DIVb:0] S, SM,
output logic [`DIVb+3:0] F
);
logic [`DIVb+3:0] FP, FN, FZ;
logic [`DIVb+3:0] SExt, SMExt, CExt;
assign SExt = {3'b0, S};
assign SMExt = {3'b0, SM};
assign CExt = {4'hf, C};
// Generate for both positive and negative bits
assign FP = ~(SExt << 1) & CExt;
assign FN = (SMExt << 1) | (CExt & (~CExt << 2));
assign FZ = '0;
// Choose which adder input will be used
always_comb
if (sp) F = FP;
else if (sz) F = FZ;
else F = FN;
endmodule
module qsel4 (
input logic [`DIVN-2:0] D,
input logic [`DIVb+3:0] WS, WC,
input logic Sqrt,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}};
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
always_comb begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-5) QSel4[i] = 4'b0000; // was -6
else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case
else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-17) QSel4[i] = 4'b0010; // was -18
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000; // was -8
else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case
else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=22) QSel4[i] = 4'b1000; // was 24
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-23) QSel4[i] = 4'b0010; // was -24
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
////////////////////////////////////
// Adder Input Generation, Radix 4 //
////////////////////////////////////
module fgen4 (
input logic [3:0] s,
input logic [`DIVLEN+3:0] C, S, SM,
output logic [`DIVLEN+3:0] F
);
logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2;
// Generate for both positive and negative bits
assign F2 = (~S << 2) & (C << 2);
assign F1 = ~(S << 1) & C;
assign F0 = '0;
assign FN1 = (SM << 1) | (C & ~(C << 3));
assign FN2 = (SM << 2) | ((C << 2)&~(C << 4));
// Choose which adder input will be used
always_comb
if (s[3]) F = F2;
else if (s[2]) F = F1;
else if (s[1]) F = FN1;
else if (s[0]) F = FN2;
else F = F0;
// assign F = sp ? FP : (sn ? FN : FZ);
endmodule

View File

@ -34,33 +34,43 @@ module resultsign(
input logic ZInf, input logic ZInf,
input logic InfIn, input logic InfIn,
input logic FmaOp, input logic FmaOp,
input logic [`NE+1:0] FmaSe, input logic [`NE+1:0] FmaMe,
input logic FmaSmZero, input logic FmaSZero,
input logic Mult, input logic Mult,
input logic R, input logic R,
input logic S, input logic S,
input logic Nsgn, input logic Ms,
output logic Ws output logic Ws
); );
logic ZeroSgn; logic Zeros;
logic InfSgn; logic Infs;
logic Underflow;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero // The IEEE754-2019 standard specifies:
// if cancelation then 0 unless round to -infinity // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
// if multiply then Psgn // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
// otherwise psign // - if x = +0 or -0 then x+x=x and x-(-x)=x
assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)); // - the sign of a product is the exclisive or or the opperand's signs
assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // Zero sign will only be selected if:
// - P=Z and a cancelation occurs - exact zero
// - Z is zero and P is zero - exact zero
// - P is killed and Z is zero - Psgn
// - Z is killed and P is zero - impossible
// Zero sign calculation:
// - if a multiply opperation is done, then use the products sign(Ps)
// - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign)
// - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// is the result negitive // is the result negitive
// if p - z is the Sum negitive // if p - z is the Sum negitive
// if -p + z is the Sum positive // if -p + z is the Sum positive
// if -p - z then the Sum is negitive // if -p - z then the Sum is negitive
assign InfSgn = ZInf ? FmaAs : FmaPs; assign Infs = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn; always_comb
if(InfIn&FmaOp) Ws = Infs;
else if(FmaSZero&FmaOp) Ws = Zeros;
else Ws = Ms;
endmodule endmodule

View File

@ -46,36 +46,32 @@ module round(
input logic [1:0] PostProcSel, input logic [1:0] PostProcSel,
input logic CvtResDenormUf, input logic CvtResDenormUf,
input logic CvtResUf, input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] Nfrac, input logic [`CORRSHIFTSZ-1:0] Mf,
input logic FmaZmSticky, // addend's sticky bit input logic FmaZmS, // addend's sticky bit
input logic ZZero, // is Z zero input logic [`NE+1:0] FmaMe, // exponent of the normalized sum
input logic FmaInvA, // invert Z input logic Ms, // the result's sign
input logic [`NE+1:0] FmaSe, // exponent of the normalized sum
input logic Nsgn, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NE+1:0] DivCorrExp, // the calculated expoent input logic [`NE+1:0] Qe, // the calculated expoent
input logic DivSticky, // sticky bit input logic DivS, // sticky bit
input logic DivNegSticky,
output logic UfPlus1, // do you add or subtract on from the result output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NF-1:0] Rf, // Result fraction output logic [`NF-1:0] Rf, // Result fraction
output logic [`NE-1:0] Re, // Result exponent output logic [`NE-1:0] Re, // Result exponent
output logic S, // sticky bit output logic S, // sticky bit
output logic [`NE+1:0] Nexp, output logic [`NE+1:0] Me,
output logic Plus1, output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result output logic R, UfL // bits needed to calculate rounding
output logic R, UfLSBRes // bits needed to calculate rounding
); );
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum logic L; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number logic UfCalcPlus1;
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result logic NormS; // normalized sum's sticky bit
logic NormSumSticky; // normalized sum's sticky bit logic UfS; // sticky bit for underlow calculation
logic UfSticky; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac; logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes; logic FpRes, IntRes;
logic UfRound; logic UfR;
logic FpRound, FpLSBRes, FpUfRound; logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1; logic CalcPlus1, FpPlus1;
logic [`FLEN:0] RoundAdd; // how much to add to the result
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Rounding // Rounding
@ -118,61 +114,61 @@ module round(
// | NF |1|1| // | NF |1|1|
// ^ ^ if floating point result // ^ ^ if floating point result
// ^ if not an FMA result // ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN // 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]); (|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32 // XLEN is either 64 or 32
// so half and single are always smaller then XLEN // so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1 // 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]); (|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]); (|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1 // 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]); (|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]); (|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN // Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end end
@ -180,37 +176,37 @@ module round(
// only add the Addend sticky if doing an FMA opperation // only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp; assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
// determine round and LSB of the rounded value // determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag // - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1]; assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1]; assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2]; assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
`FMT: begin `FMT: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; FpRound = Mf[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end end
`FMT1: begin `FMT1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1]; FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1]; FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2]; FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
end end
`FMT2: begin `FMT2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1]; FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2]; FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2]; FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
end end
default: begin default: begin
FpRound = 1'bx; FpRound = 1'bx;
@ -222,130 +218,97 @@ module round(
always_comb always_comb
case (OutFmt) case (OutFmt)
2'h3: begin 2'h3: begin
FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1]; FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF]; FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2]; FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end end
2'h1: begin 2'h1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1]; FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF]; FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2]; FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end end
2'h0: begin 2'h0: begin
FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1]; FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF]; FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2]; FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end end
2'h2: begin 2'h2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1]; FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF]; FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2]; FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end end
endcase endcase
end end
assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound; assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes; assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag // used to determine underflow flag
assign UfLSBRes = FpRound; assign UfL = FpRound;
// determine sticky // determine sticky
assign S = UfSticky | UfRound; assign S = UfS | UfR;
// Deterimine if a small number was supposed to be subtrated
// - for FMA or if division has a negitive sticky bit
assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
always_comb begin always_comb begin
// Determine if you add 1 // Determine if you add 1
case (Frm) case (Frm)
3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even 3'b000: CalcPlus1 = R & (S| L);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero 3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down 3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up 3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude 3'b100: CalcPlus1 = R;//round to nearest max magnitude
default: CalcPlus1 = 1'bx; default: CalcPlus1 = 1'bx;
endcase endcase
// Determine if you add 1 (for underflow flag) // Determine if you add 1 (for underflow flag)
case (Frm) case (Frm)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero 3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down 3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up 3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx; default: UfCalcPlus1 = 1'bx;
endcase endcase
// Determine if you subtract 1
case (Frm)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end end
// If an answer is exact don't round // If an answer is exact don't round
assign Plus1 = CalcPlus1 & (S | R); assign Plus1 = CalcPlus1 & (S | R);
assign FpPlus1 = Plus1&~(ToInt&CvtOp); assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
assign Minus1 = CalcMinus1 & (S | R);
// Compute rounded result // Compute rounded result
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1}; assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
// \/FLEN+1 // \/FLEN+1
// | NE+2 | NF | // | NE+2 | NF |
// '-NE+2-^----NF1----^ // '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} : assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
always_comb begin assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4)
always_comb begin assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
// determine the result to be roundned // determine the result to be roundned
assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb always_comb
case(PostProcSel) case(PostProcSel)
2'b10: Nexp = FmaSe; // fma 2'b10: Me = FmaMe; // fma
2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide 2'b01: Me = DivDone ? Qe : '0; // divide
default: Nexp = '0; default: Me = '0;
endcase endcase
// round the result // round the result
// - if the fraction overflows one should be added to the exponent // - if the fraction overflows one should be added to the exponent
assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd; assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullResExp[`NE-1:0]; assign Re = FullRe[`NE-1:0];
endmodule endmodule

View File

@ -34,27 +34,20 @@ module roundsign(
input logic Xs, input logic Xs,
input logic Ys, input logic Ys,
input logic FmaNegSum, input logic FmaNegSum,
input logic Sqrt,
input logic FmaOp, input logic FmaOp,
input logic DivOp, input logic DivOp,
input logic CvtOp, input logic CvtOp,
input logic CvtCs, input logic CvtCs,
output logic Nsgn input logic FmaSs,
output logic Ms
); );
logic FmaResSgnTmp; logic Qs;
logic DivSgn;
// is the result negitive assign Qs = Xs^(Ys&~Sqrt);
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage
// assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
assign DivSgn = Xs^Ys;
// Sign for rounding calulation // Sign for rounding calulation
assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp); assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule endmodule

View File

@ -28,23 +28,22 @@
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
module lzacorrection( module shiftcorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp, input logic FmaOp,
input logic DivOp, input logic DivOp,
input logic DivResDenorm, input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExp, input logic [`NE+1:0] DivQe,
input logic [`NE+1:0] DivDenormShift, input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic FmaKillProd, // is the product set to zero input logic FmaSZero,
input logic FmaSmZero, output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] Qe,
output logic [`NE+1:0] DivCorrExp, output logic [`NE+1:0] FmaMe // exponent of the normalized sum
output logic [`NE+1:0] FmaSe // exponent of the normalized sum
); );
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted; logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
logic ResDenorm; // is the result denormalized logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -54,16 +53,19 @@ module lzacorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
else if (DivOp&~DivResDenorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent // Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}}; assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
// recalculate if the result is denormalized // recalculate if the result is denormalized
assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination // the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2}; assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
endmodule endmodule

View File

@ -29,17 +29,17 @@
`include "wally-config.vh" `include "wally-config.vh"
module resultselect( module specialcase(
input logic Xs, // input signs input logic Xs, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic XNaN, YNaN, ZNaN, // inputs are NaN input logic XNaN, YNaN, ZNaN, // inputs are NaN
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn, input logic InfIn,
input logic NaNIn,
input logic XInf, YInf, input logic XInf, YInf,
input logic XZero, input logic XZero,
input logic IntZero, input logic IntZero,
input logic NaNIn,
input logic IntToFp, input logic IntToFp,
input logic Int64, input logic Int64,
input logic Signed, input logic Signed,
@ -53,10 +53,10 @@ module resultselect(
input logic IntInvalid, Invalid, Overflow, // flags input logic IntInvalid, Invalid, Overflow, // flags
input logic CvtResUf, input logic CvtResUf,
input logic [`NE-1:0] Re, // Res exponent input logic [`NE-1:0] Re, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent input logic [`NE+1:0] FullRe, // Res exponent
input logic [`NF-1:0] Rf, // Res fraction input logic [`NF-1:0] Rf, // Res fraction
input logic [`XLEN+1:0] CvtNegRes, // the negation of the result input logic [`XLEN+1:0] CvtNegRes, // the negation of the result
output logic [`FLEN-1:0] W, // final res output logic [`FLEN-1:0] PostProcRes, // final res
output logic [`XLEN-1:0] FCvtIntRes // final res output logic [`XLEN-1:0] FCvtIntRes // final res
); );
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
@ -95,9 +95,14 @@ module resultselect(
end else begin end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end end
assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : always_comb
OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; if(OutFmt)
if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
else
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
@ -231,23 +236,24 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input // - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero // output infinity with result sign if divide by zero
if(`IEEE754) begin if(`IEEE754)
assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes : always_comb
YNaN&~CvtOp ? YNaNRes : if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
ZNaN&FmaOp ? ZNaNRes : else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
Invalid ? InvalidRes : else if(ZNaN&FmaOp) PostProcRes = ZNaNRes;
SelOfRes ? OfRes : else if(Invalid) PostProcRes = InvalidRes;
KillRes ? UfRes : else if(SelOfRes) PostProcRes = OfRes;
NormRes; else if(KillRes) PostProcRes = UfRes;
end else begin else PostProcRes = NormRes;
assign W = NaNIn|Invalid ? InvalidRes : else
SelOfRes ? OfRes : always_comb
KillRes ? UfRes : if(NaNIn|Invalid) PostProcRes = InvalidRes;
NormRes; else if(SelOfRes) PostProcRes = OfRes;
end else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// //
@ -272,10 +278,17 @@ module resultselect(
// unsigned | 2^32-1 | 2^64-1 | // unsigned | 2^32-1 | 2^64-1 |
// //
// other: 32 bit unsinged res should be sign extended as if it were a signed number // other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive always_comb
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive if(Signed)
Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive if(Xs&~NaNIn) // signed negitive
{`XLEN{1'b1}};// unsigned positive if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
else // signed positive
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
else
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
// select the integer output // select the integer output
@ -284,7 +297,11 @@ module resultselect(
// - if rounding and signed opperation and negitive input, output -1 // - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0 // - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary) // - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntRes = IntInvalid ? OfIntRes : always_comb
CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? if(IntInvalid) FCvtIntRes = OfIntRes;
Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; else if(CvtCe[`NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
endmodule endmodule

View File

@ -1,312 +0,0 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtradix4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`DIVLEN+2:0] Quot,
output logic [`DIVLEN+3:0] WSN, WCN,
output logic [`DIVLEN+3:0] WS, WC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
logic [3:0] q;
logic [`DIVLEN+3:0] WSA;
logic [`DIVLEN+3:0] WCA;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// *** change this for radix 4 - generate w/ stine code
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
////////////////
// Submodules //
////////////////
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
initial begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [3:0] q,
output logic [`DIVLEN+2:0] Quot
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`DIVLEN:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Quot[`DIVLEN:0];
QMR = QM[`DIVLEN:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Quoteint is in the range [.5, 2)
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
module expcalc(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

275
pipelined/src/fpu/srt.sv Normal file
View File

@ -0,0 +1,275 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srt(
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE,
input logic Sqrt,
input logic [`DIVb:0] X,
input logic [`DIVN-2:0] Dpreproc,
input logic NegSticky,
output logic [`DIVb-(`RADIX/4):0] Qm,
output logic [`DIVb+3:0] NextWSN, NextWCN,
output logic [`DIVb+3:0] StickyWSA,
output logic [`DIVb+3:0] FirstWS, FirstWC
);
//QLEN = 1.(number of bits created for division)
// N is NF+1 or XLEN
// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-1:0
// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
// Q/QM/S/SM should be 1.b so b+1 bits or b:0
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb:0] Q[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] QM[`DIVCOPIES-1:0];// 1.b
logic [`DIVb:0] QNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] QMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] S[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SM[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb-1:0] C[`DIVCOPIES-1:0]; // 0.b
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] WSN, WCN; // Q4.N-1
logic [`DIVN-2:0] D; // U0.N-1
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1
logic [`DIVb:0] QMMux;
logic [`DIVb-1:0] NextC;
logic [`DIVb-1:0] CMux;
logic [`DIVb:0] SMux;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
if (`RADIX == 2) begin : nextw
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+2:0], 1'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+2:0], 1'b0};
assign NextC = {1'b1, C[`DIVCOPIES-1][`DIVb-1:1]};
end else begin
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+1:0], 2'b0};
assign NextC = {2'b11, C[`DIVCOPIES-1][`DIVb-1:2]};
end
// mux2 #(`DIVb+4) wsmux(NextWSN, {{3{Sqrt}}, X}, DivStart, WSN); //*** modified for sqrt which doesnt work
// flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
// mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
// flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
// flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
// mux2 #(`DIVb) Cmux(NextC, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
// flop #(`DIVb) cflop(clk, CMux, C[0]);
mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN);
flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
flop #(`DIVb) cflop(clk, CMux, C[0]);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is only the fraction
assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
if(`RADIX == 4) begin : d2
assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
end
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]),
.C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]));
if(i<(`DIVCOPIES-1)) begin
if (`RADIX==2)begin
assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0};
assign WC[i+1] = {WCA[i][`DIVb+2:0], 1'b0};
assign C[i+1] = {1'b1, C[i][`DIVb-1:1]};
end else begin
assign WS[i+1] = {WSA[i][`DIVb+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVb+1:0], 2'b0};
assign C[i+1] = {2'b11, C[i][`DIVb-1:2]};
end
assign Q[i+1] = QNext[i];
assign QM[i+1] = QMNext[i];
assign S[i+1] = SNext[i];
assign SM[i+1] = SMNext[i];
end
end
endgenerate
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVb+1) QMmux(QMNext[`DIVCOPIES-1], '1, DivStart, QMMux);
flopenr #(`DIVb+1) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]);
flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]);
mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux);
flop #(`DIVb+1) Sreg(clk, SMux, S[0]);
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
always_comb
if(Sqrt) // sqrt ouputs in the range (1, .5]
if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0};
else Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0};
else
if(NegSticky) Qm = QM[0][`DIVb-(`RADIX/4):0];
else Qm = Q[0][`DIVb-(`RADIX/4):0];
assign FirstWS = WS[0];
assign FirstWC = WC[0];
if(`RADIX==2)
if (`DIVCOPIES == 1)
assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0};
else
assign StickyWSA = {WSA[1][`DIVb+2:0], 1'b0};
endmodule
////////////////
// Submodules //
////////////////
/* verilator lint_off UNOPTFLAT */
module divinteration (
input logic [`DIVN-2:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
input logic [`DIVb:0] Q, QM,
input logic [`DIVb:0] S, SM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb-1:0] C,
input logic Sqrt,
output logic [`DIVb:0] QNext, QMNext,
output logic [`DIVb:0] SNext, SMNext,
output logic [`DIVb+3:0] WSA, WCA
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] Dsel;
logic [3:0] q;
logic qp, qz;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
// Qmient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
if(`RADIX == 2) begin : qsel
qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz);
fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F);
end else begin
qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q);
// fgen4 fgen4(.s(q), .C, .S, .SM, .F);
end
if(`RADIX == 2) begin : dsel
assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}});
end else begin
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase
end
// Partial Product Generation
// WSA, WCA = WS + WC - qD
assign AddIn = Sqrt ? F : Dsel;
if (`RADIX == 2) begin : csa
csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~Sqrt, WSA, WCA);
end else begin
csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA);
end
if (`RADIX == 2) begin : otfc
otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
sotfc2 sotfc2(.sp(qp), .sz(qz), .C, .S, .SM, .SNext, .SMNext);
end else begin
otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
// sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext);
end
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule

View File

@ -33,47 +33,57 @@
module srtfsm( module srtfsm(
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC, input logic [`DIVb+3:0] NextWSN, NextWCN, WS, WC,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
input logic DivStart, input logic DivStart,
input logic StallE, input logic XsE,
input logic StallM, input logic SqrtE,
input logic [$clog2(`DIVLEN/2+3)-1:0] Dur, input logic StallE,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, input logic StallM,
output logic DivStickyE, input logic [`DIVb+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivSE,
output logic DivDone, output logic DivDone,
output logic DivNegStickyE, output logic NegSticky,
output logic DivBusy output logic DivBusy
); );
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
statetype state; statetype state;
logic [$clog2(`DIVLEN/2+3)-1:0] step; logic [`DURLEN-1:0] step;
logic WZero; logic WZero;
//logic [$clog2(`DIVLEN/2+3)-1:0] Dur; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DIVLEN+3:0] W; logic [`DIVb+3:0] W;
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY); assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0}); assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0});
assign DivStickyE = ~WZero; // calculate sticky bit
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2)
assign DivSE = |W&~(StickyWSA == WS);
else
assign DivSE = |W;
assign DivDone = (state == DONE); assign DivDone = (state == DONE);
assign W = WC+WS; assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? assign NegSticky = W[`DIVb+3];
assign EarlyTermShiftDiv2E = step; assign EarlyTermShiftE = step;
always_ff @(posedge clk) begin always_ff @(posedge clk) begin
if (reset) begin if (reset) begin
state <= #1 IDLE; state <= #1 IDLE;
end else if (DivStart&~StallE) begin end else if (DivStart&~StallE) begin
step <= Dur; step <= Dur;
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE|(XsE&SqrtE)) state <= #1 DONE;
else state <= #1 BUSY; else state <= #1 BUSY;
end else if (state == BUSY) begin end else if (state == BUSY) begin
if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
state <= #1 DONE; state <= #1 DONE;
end end
step <= step - 1; step <= step - 1;

View File

@ -31,16 +31,25 @@
`include "wally-config.vh" `include "wally-config.vh"
module srtpreproc ( module srtpreproc (
input logic [`NF:0] XManE, YManE, input logic clk,
output logic [`DIVLEN-1:0] X, input logic DivStart,
output logic [`DIVLEN-1:0] Dpreproc, input logic [`NF:0] Xm, Ym,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, input logic [`NE-1:0] Xe, Ye,
output logic [$clog2(`DIVLEN/2+3)-1:0] Dur input logic [`FMTBITS-1:0] Fmt,
input logic Sqrt,
input logic XZero,
output logic [`NE+1:0] QeM,
output logic [`DIVb:0] X,
output logic [`DIVN-2:0] Dpreproc,
output logic [`DURLEN-1:0] Dur
); );
// logic [`XLEN-1:0] PosA, PosB; // logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN-1:0] PreprocA, PreprocX; logic [`NF-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY; logic [`NF-1:0] PreprocB, PreprocY;
logic [`NF+1:0] SqrtX;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`NE+1:0] Qe;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
@ -49,24 +58,79 @@ module srtpreproc (
// ***can probably merge X LZC with conversion // ***can probably merge X LZC with conversion
// cout the number of leading zeros // cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt); lzc #(`NF+1) lzcX (Xm, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt); lzc #(`NF+1) lzcY (Ym, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA; // assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1); // assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}}; assign PreprocX = Xm[`NF-1:0]<<XZeroCnt;
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}}; assign PreprocY = Ym[`NF-1:0]<<YZeroCnt;
assign X = PreprocX; assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
assign Dpreproc = PreprocY; assign X = Sqrt ? {SqrtX, {`DIVb-1-`NF{1'b0}}} : {~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
assign Dur = (`DURLEN)'(`FPDUR);
assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2); // radix 2 radix 4
// assign intExp = zeroCntB - zeroCntA + 1; // 1 copies DIVLEN+2 DIVLEN+2/2
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // 2 copies DIVLEN+2/2 DIVLEN+2/2*2
// 4 copies DIVLEN+2/4 DIVLEN+2/2*4
// 8 copies DIVLEN+2/8 DIVLEN+2/2*8
// DIVRESLEN = DIVLEN or DIVLEN+2
// r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES)
flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe);
endmodule
module expcalc(
input logic [`FMTBITS-1:0] Fmt,
input logic [`NE-1:0] Xe, Ye,
input logic Sqrt,
input logic XZero,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] Qe
);
logic [`NE-2:0] Bias;
logic [`NE+1:0] SXExp;
logic [`NE+1:0] SExp;
logic [`NE+1:0] DExp;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (Fmt)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
assign SXExp = {2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - (`NE+1)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
// correct exponent for denormalized input's normalization shifts
assign DExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZero}};
assign Qe = Sqrt ? SExp : DExp;
endmodule endmodule

View File

@ -30,35 +30,35 @@
module unpack ( module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ input logic XEn, YEn, ZEn,
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic XZero, YZero, ZZero, // is XYZ zero
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
); );
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input denormalized // is the input denormalized
assign XDenormE = ~XExpNonZero & ~XFracZero; assign XDenorm = ~XExpNonZero & ~XFracZero;
assign ZDenormE = ~ZExpNonZero & ~ZFracZero; assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
endmodule endmodule

View File

@ -30,7 +30,8 @@
module unpackinput ( module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file input logic [`FLEN-1:0] In, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half input logic En, // enable the input
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
@ -74,16 +75,16 @@ module unpackinput (
// quad and half // quad and half
// double and half // double and half
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// choose sign bit depending on format - 1=larger precsion 0=smaller precision // choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
// extract the fraction, add trailing zeroes to the mantissa if nessisary // extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero // is the exponent non-zero
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// example double to single conversion: // example double to single conversion:
// 1023 = 0011 1111 1111 // 1023 = 0011 1111 1111
@ -95,10 +96,10 @@ module unpackinput (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary // extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a denormal number convert the exponent value 1 // - if the original precision had a denormal number convert the exponent value 1
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
// is the exponent all 1's // is the exponent all 1's
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported end else if (`FPSIZES == 3) begin // three floating point precsions supported
@ -122,7 +123,7 @@ module unpackinput (
// Check NaN boxing // Check NaN boxing
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: BadNaNBox = 0; `FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@ -131,7 +132,7 @@ module unpackinput (
// extract the sign bit // extract the sign bit
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Sgn = In[`FLEN-1]; `FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1]; `FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1]; `FMT2: Sgn = In[`LEN2-1];
@ -140,7 +141,7 @@ module unpackinput (
// extract the fraction // extract the fraction
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Frac = In[`NF-1:0]; `FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@ -149,7 +150,7 @@ module unpackinput (
// is the exponent non-zero // is the exponent non-zero
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@ -166,7 +167,7 @@ module unpackinput (
// convert the larger precision's exponent to use the largest precision's bias // convert the larger precision's exponent to use the largest precision's bias
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
@ -175,7 +176,7 @@ module unpackinput (
// is the exponent all 1's // is the exponent all 1's
always_comb always_comb
case (FmtE) case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF]; `FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1]; `FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2]; `FMT2: ExpMax = &In[`LEN2-2:`NF2];
@ -194,7 +195,7 @@ module unpackinput (
// Check NaN boxing // Check NaN boxing
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: BadNaNBox = 0; 2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@ -203,7 +204,7 @@ module unpackinput (
// extract sign bit // extract sign bit
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Sgn = In[`Q_LEN-1]; 2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1]; 2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1]; 2'b00: Sgn = In[`S_LEN-1];
@ -213,7 +214,7 @@ module unpackinput (
// extract the fraction // extract the fraction
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Frac = In[`Q_NF-1:0]; 2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -222,7 +223,7 @@ module unpackinput (
// is the exponent non-zero // is the exponent non-zero
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
@ -240,7 +241,7 @@ module unpackinput (
// convert the double precsion exponent into quad precsion // convert the double precsion exponent into quad precsion
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@ -250,7 +251,7 @@ module unpackinput (
// is the exponent all 1's // is the exponent all 1's
always_comb always_comb
case (FmtE) case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; 2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; 2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
@ -262,8 +263,8 @@ module unpackinput (
// Output logic // Output logic
assign FracZero = ~|Frac; // is the fraction zero? assign FracZero = ~|Frac; // is the fraction zero?
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN? assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign Inf = ExpMax & FracZero; // is the input infinity? assign Inf = ExpMax & FracZero &En; // is the input infinity?
assign Zero = ~ExpNonZero & FracZero; // is the input zero? assign Zero = ~ExpNonZero & FracZero; // is the input zero?
endmodule endmodule

View File

@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
/* verilator lint_off CMPCONST */ /* verilator lint_off CMPCONST */
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
int i; logic [31:0] i;
always_comb begin always_comb begin
i = 0; i = 0;
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one

View File

@ -196,13 +196,13 @@ module ifu (
if (`IBUS) begin : bus if (`IBUS) begin : bus
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN; localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; localparam integer LOGBWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1;
logic [LINELEN-1:0] ICacheBusWriteData; logic [LINELEN-1:0] ICacheBusWriteData;
logic [`PA_BITS-1:0] ICacheBusAdr; logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck; logic ICacheBusAck;
logic SelUncachedAdr; logic SelUncachedAdr;
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED)
busdp(.clk, .reset, busdp(.clk, .reset,
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(), .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete), .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
@ -222,11 +222,11 @@ module ifu (
if(CACHE_ENABLED) begin : icache if(CACHE_ENABLED) begin : icache
cache #(.LINELEN(`ICACHE_LINELENINBITS), cache #(.LINELEN(`ICACHE_LINELENINBITS),
.NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS),
.NUMWAYS(`ICACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(), .CacheFetchLine(ICacheFetchLine),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF), .Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
@ -236,7 +236,7 @@ module ifu (
.Atomic('0), .FlushCache('0), .Atomic('0), .FlushCache('0),
.NextAdr(PCNextFSpill[11:0]), .NextAdr(PCNextFSpill[11:0]),
.PAdr(PCPF), .PAdr(PCPF),
.CacheCommitted(), .InvalidateCacheM(InvalidateICacheM)); .CacheCommitted(), .InvalidateCache(InvalidateICacheM));
end else begin : passthrough end else begin : passthrough
assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0; assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0;

View File

@ -126,10 +126,16 @@ module busfsm #(parameter integer WordCountThreshold,
else BusNextState = STATE_BUS_READY; else BusNextState = STATE_BUS_READY;
STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY;
else BusNextState = STATE_BUS_READY; else BusNextState = STATE_BUS_READY;
STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY; STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) begin
else BusNextState = STATE_BUS_FETCH; if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH;
STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY; else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE;
else BusNextState = STATE_BUS_WRITE; else BusNextState = STATE_BUS_READY;
end else BusNextState = STATE_BUS_FETCH;
STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) begin
if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH;
else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE;
else BusNextState = STATE_BUS_READY;
end else BusNextState = STATE_BUS_WRITE;
default: BusNextState = STATE_BUS_READY; default: BusNextState = STATE_BUS_READY;
endcase endcase
end end

View File

@ -58,7 +58,7 @@ module lsu (
input logic sfencevmaM, input logic sfencevmaM,
// fpu // fpu
input logic [`FLEN-1:0] FWriteDataM, input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2, input logic FStore2,
input logic FpLoadStoreM, input logic FpLoadStoreM,
// faults // faults
output logic LoadPageFaultM, StoreAmoPageFaultM, output logic LoadPageFaultM, StoreAmoPageFaultM,
@ -77,8 +77,6 @@ module lsu (
(* mark_debug = "true" *) output logic [2:0] LSUBurstType, (* mark_debug = "true" *) output logic [2:0] LSUBurstType,
(* mark_debug = "true" *) output logic [1:0] LSUTransType, (* mark_debug = "true" *) output logic [1:0] LSUTransType,
(* mark_debug = "true" *) output logic LSUTransComplete, (* mark_debug = "true" *) output logic LSUTransComplete,
output logic [(`XLEN-1)/8:0] ByteMaskM,
// page table walker // page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -116,6 +114,7 @@ module lsu (
logic [`XLEN-1:0] LSUWriteDataM; logic [`XLEN-1:0] LSUWriteDataM;
logic [`XLEN-1:0] WriteDataM; logic [`XLEN-1:0] WriteDataM;
logic [`LLEN-1:0] ReadDataM; logic [`LLEN-1:0] ReadDataM;
logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM;
// *** TO DO: Burst mode // *** TO DO: Burst mode
@ -192,7 +191,8 @@ module lsu (
// Memory System // Memory System
// Either Data Cache or Data Tightly Integrated Memory or just bus interface // Either Data Cache or Data Tightly Integrated Memory or just bus interface
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM; logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
logic [`LLEN-1:0] FinalWriteDataM;
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`LLEN-1:0] ReadDataWordMuxM; logic [`LLEN-1:0] ReadDataWordMuxM;
logic IgnoreRequest; logic IgnoreRequest;
@ -202,24 +202,24 @@ module lsu (
if (`DMEM == `MEM_TIM) begin : dtim if (`DMEM == `MEM_TIM) begin : dtim
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM), .DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM),
.DCacheMiss, .DCacheAccess); .DCacheMiss, .DCacheAccess);
end end
if (`DBUS) begin : bus if (`DBUS) begin : bus
localparam CACHE_ENABLED = `DMEM == `MEM_CACHE; localparam CACHE_ENABLED = `DMEM == `MEM_CACHE;
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN; localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1; localparam integer LOGBWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1;
logic [LINELEN-1:0] DCacheBusWriteData; logic [LINELEN-1:0] DCacheBusWriteData;
logic [`PA_BITS-1:0] DCacheBusAdr; logic [`PA_BITS-1:0] DCacheBusAdr;
logic DCacheWriteLine; logic DCacheWriteLine;
logic DCacheFetchLine; logic DCacheFetchLine;
logic DCacheBusAck; logic DCacheBusAck;
logic [LOGWPL-1:0] WordCount; logic [LOGBWPL-1:0] WordCount;
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp( busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp(
.clk, .reset, .clk, .reset,
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, .LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.WordCount, .LSUBusWriteCrit, .WordCount, .LSUBusWriteCrit,
@ -230,21 +230,25 @@ module lsu (
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}), mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
.s(SelUncachedAdr), .y(ReadDataWordMuxM)); .s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM), mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA)); .s(SelUncachedAdr), .y(LSUBusHWDATA));
if(CACHE_ENABLED) begin : dcache if(CACHE_ENABLED) begin : dcache
if (`LLEN>`XLEN)
mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
else
assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, .ByteMask(FinalByteMaskM), .WordCount,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM),
.CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine), .CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine),
.CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
end else begin : passthrough end else begin : passthrough
assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0; assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0;
@ -272,7 +276,13 @@ module lsu (
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM);
// Compute byte masks
swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM));
// *** fix when when fstore2 is valid. I'm not sure this is even needed if LSUFunct3M can be 3'b100 for a 16 byte write.
//assign FinalByteMaskM = FStore2 ? '1 : ByteMaskM;
assign FinalByteMaskM = ByteMaskM;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register // MW Pipeline Register
@ -286,10 +296,10 @@ module lsu (
// swap the bytes when read from big-endian memory // swap the bytes when read from big-endian memory
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
if (`BIGENDIAN_SUPPORTED) begin:endian if (`BIGENDIAN_SUPPORTED) begin:endian
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM)); bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
end else begin end else begin
assign FinalWriteDataM = LittleEndianWriteDataM; assign IEUWriteDataM = LittleEndianWriteDataM;
assign LittleEndianReadDataWordM = ReadDataWordM; assign LittleEndianReadDataWordM = ReadDataWordM;
end end

View File

@ -34,13 +34,8 @@ module subwordwrite (
input logic [2:0] LSUPAdrM, input logic [2:0] LSUPAdrM,
input logic [2:0] LSUFunct3M, input logic [2:0] LSUFunct3M,
input logic [`XLEN-1:0] AMOWriteDataM, input logic [`XLEN-1:0] AMOWriteDataM,
output logic [`XLEN-1:0] LittleEndianWriteDataM, output logic [`XLEN-1:0] LittleEndianWriteDataM);
output logic [`XLEN/8-1:0] ByteMaskM
);
// Compute byte masks
swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM));
// Replicate data for subword writes // Replicate data for subword writes
if (`XLEN == 64) begin:sww if (`XLEN == 64) begin:sww
always_comb always_comb

View File

@ -0,0 +1,59 @@
///////////////////////////////////////////
// swbytemask.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: On-chip RAM, external to core
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR CO///////////////////////////////////////////
// swbytemask.sv
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module swbytemaskword #(parameter WORDLEN = 64)(
input logic [2:0] Size,
input logic [$clog2(WORDLEN/8)-1:0] Adr,
output logic [WORDLEN/8-1:0] ByteMask);
assign ByteMask = ((2**(2**Size))-1) << Adr;
/* Equivalent to the following for WORDLEN = 64
if(WORDLEN == 64) begin
always_comb begin
case(Size[1:0])
2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb
2'b01: case (Adr[2:1])
2'b00: ByteMask = 8'b0000_0011;
2'b01: ByteMask = 8'b0000_1100;
2'b10: ByteMask = 8'b0011_0000;
2'b11: ByteMask = 8'b1100_0000;
endcase
2'b10: if (Adr[2]) ByteMask = 8'b11110000;
else ByteMask = 8'b00001111;
2'b11: ByteMask = 8'b1111_1111;
default ByteMask = 8'b0000_0000;
endcase
end
end
*/
endmodule

View File

@ -172,8 +172,8 @@ module plic_apb (
end end
// pending interrupt requests // pending interrupt requests
//assign nextIntPending = (intPending | requests) & ~intInProgress; // assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending); flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals // context-dependent signals

View File

@ -165,6 +165,7 @@ module uartPC16550D(
SCR <= #1 8'b0; // not strictly necessary to reset SCR <= #1 8'b0; // not strictly necessary to reset
end else begin end else begin
if (~MEMWb) begin if (~MEMWb) begin
/* verilator lint_off CASEINCOMPLETE */
case (A) case (A)
/* -----\/----- EXCLUDED -----\/----- /* -----\/----- EXCLUDED -----\/-----
3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
@ -177,34 +178,42 @@ module uartPC16550D(
// freq /baud / 16 = div // freq /baud / 16 = div
//3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section //3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section
//3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in //3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in
3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in 3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in
3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0]; 3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0];
3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing 3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing
3'b011: LCR <= #1 Din; 3'b011: LCR <= #1 Din;
3'b100: MCR <= #1 Din[4:0]; 3'b100: MCR <= #1 Din[4:0];
3'b101: LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3
3'b110: MSR <= #1 Din[3:0];
3'b111: SCR <= #1 Din; 3'b111: SCR <= #1 Din;
endcase endcase
/* verilator lint_on CASEINCOMPLETE */
end end
// Line Status Register (8.6.3) // Line Status Register (8.6.3)
// Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay. // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay.
LSR[0] <= #1 rxdataready; // Data ready if (~MEMWb & (A == 3'b101))
LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3
LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error else begin
LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error LSR[0] <= #1 rxdataready; // Data ready
LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error
LSR[5] <= #1 THRE; // THRE LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error
LSR[6] <= #1 ~txsrfull & THRE; // TEMT LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error
if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator
LSR[5] <= #1 THRE; // THRE
LSR[6] <= #1 ~txsrfull & THRE; // TEMT
if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error
end
// Modem Status Register (8.6.8) // Modem Status Register (8.6.8)
MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send if (~MEMWb & (A == 3'b110))
MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready MSR <= #1 Din[3:0];
MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator else if (~MEMRb & (A == 3'b110))
MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect MSR <= #1 4'b0; // Reading MSR clears the flags in MSR bits 3:0
else begin
MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready
MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
end
end end
always_comb always_comb
if (~MEMRb) if (~MEMRb)
@ -215,7 +224,8 @@ module uartPC16550D(
3'b011: Dout = LCR; 3'b011: Dout = LCR;
3'b100: Dout = {3'b000, MCR}; 3'b100: Dout = {3'b000, MCR};
3'b101: Dout = LSR; 3'b101: Dout = LSR;
3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; // 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]};
3'b110: Dout = {~DCDbsync, ~RIbsync, ~DSRbsync, ~CTSbsync, MSR[3:0]};
3'b111: Dout = SCR; 3'b111: Dout = SCR;
endcase endcase
else Dout = 8'b0; else Dout = 8'b0;
@ -304,7 +314,7 @@ module uartPC16550D(
// ERROR CONDITIONS // ERROR CONDITIONS
assign rxparity = ^rxdata; assign rxparity = ^rxdata;
assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted) assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted)
assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full
assign rxframingerr = ~rxstopbit; // framing error if no stop bit assign rxframingerr = ~rxstopbit; // framing error if no stop bit
assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time
@ -314,11 +324,13 @@ module uartPC16550D(
if (~PRESETn) begin if (~PRESETn) begin
rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0; rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0;
end else begin end else begin
if (rxstate == UART_DONE) begin if (~MEMWb & (A == 3'b010) & Din[1]) begin
rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0;
end else if (rxstate == UART_DONE) begin
RXBR <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register RXBR <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register
if (rxoverrunerr) $warning("UART RX Overrun Error\n"); if (rxoverrunerr) $warning("UART RX Overrun Err\n");
if (rxparityerr) $warning("UART RX Parity Error\n"); if (rxparityerr) $warning("UART RX Parity Err\n");
if (rxframingerr) $warning("UART RX Framing Error\n"); if (rxframingerr) $warning("UART RX Framing Err\n");
if (fifoenabled) begin if (fifoenabled) begin
rxfifo[rxfifohead] <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; rxfifo[rxfifohead] <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata};
rxfifohead <= #1 rxfifohead + 1; rxfifohead <= #1 rxfifohead + 1;
@ -327,7 +339,8 @@ module uartPC16550D(
end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo
if (fifoenabled) begin if (fifoenabled) begin
if (~rxfifoempty) rxfifotail <= #1 rxfifotail + 1; if (~rxfifoempty) rxfifotail <= #1 rxfifotail + 1;
if (rxfifoempty) rxdataready <= #1 0; // if (rxfifoempty) rxdataready <= #1 0;
if (rxfifoentries == 1) rxdataready <= #1 0; // When reading the last entry, data ready becomes zero
end else begin end else begin
rxdataready <= #1 0; rxdataready <= #1 0;
RXBR <= #1 {1'b0, RXBR[9:0]}; // Ben 31 March 2022: I added this so that rxoverrunerr permanently goes away upon reading RBR (when not in FIFO mode) RXBR <= #1 {1'b0, RXBR[9:0]}; // Ben 31 March 2022: I added this so that rxoverrunerr permanently goes away upon reading RBR (when not in FIFO mode)
@ -405,7 +418,7 @@ module uartPC16550D(
txstate <= #1 UART_IDLE; txstate <= #1 UART_IDLE;
end end
assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1
// *** explain; is this necessary? // *** explain; is this necessary?
if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE
else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE
@ -438,6 +451,8 @@ module uartPC16550D(
always_ff @(posedge PCLK, negedge PRESETn) always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin if (~PRESETn) begin
txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff; txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff;
end else if (~MEMWb & (A == 3'b010) & Din[2]) begin
txfifohead <= #1 0; txfifotail <= #1 0;
end else begin end else begin
if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo
if (fifoenabled) begin if (fifoenabled) begin
@ -451,7 +466,7 @@ module uartPC16550D(
end end
if (txstate == UART_IDLE) begin // move data into tx shift register if available if (txstate == UART_IDLE) begin // move data into tx shift register if available
if (fifoenabled) begin if (fifoenabled) begin
if (~txfifoempty) begin if (~txfifoempty & ~txsrfull) begin
txsr <= #1 txdata; txsr <= #1 txdata;
txfifotail <= #1 txfifotail+1; txfifotail <= #1 txfifotail+1;
txsrfull <= #1 1; txsrfull <= #1 1;

View File

@ -93,7 +93,7 @@ module wallypipelinedcore (
logic FStallD; logic FStallD;
logic FWriteIntE; logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE; logic [`XLEN-1:0] FWriteDataE;
logic FLoad2; logic FStore2;
logic [`FLEN-1:0] FWriteDataM; logic [`FLEN-1:0] FWriteDataM;
logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW; logic [`XLEN-1:0] FCvtIntResW;
@ -116,7 +116,6 @@ module wallypipelinedcore (
logic [1:0] PageType; logic [1:0] PageType;
logic sfencevmaM, wfiM, IntPendingM; logic sfencevmaM, wfiM, IntPendingM;
logic SelHPTW; logic SelHPTW;
logic [`XLEN/8-1:0] ByteMaskM;
// PMA checker signals // PMA checker signals
@ -259,14 +258,13 @@ module wallypipelinedcore (
.CommittedM, .DCacheMiss, .DCacheAccess, .CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW, .SquashSCW,
.FpLoadStoreM, .FpLoadStoreM,
.FWriteDataM, .FLoad2, .FWriteDataM, .FStore2,
//.DataMisalignedM(DataMisalignedM), //.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE, .IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataW, .FlushDCacheM, .ReadDataW, .FlushDCacheM,
// connected to ahb (all stay the same) // connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit, .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.ByteMaskM,
// connect to csr or privilege and stay the same. // connect to csr or privilege and stay the same.
.PrivilegeModeW, .BigEndianM, // connects to csr .PrivilegeModeW, .BigEndianM, // connects to csr
@ -313,7 +311,6 @@ module wallypipelinedcore (
.LSUTransComplete, .LSUTransComplete,
.LSUBusAck, .LSUBusAck,
.LSUBusInit, .LSUBusInit,
.ByteMaskM,
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
.HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST,
@ -400,7 +397,7 @@ module wallypipelinedcore (
.STATUS_FS, // is floating-point enabled? .STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable .FRegWriteM, // FP register write enable
.FpLoadStoreM, .FpLoadStoreM,
.FLoad2, .FStore2,
.FStallD, // Stall the decode stage .FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable .FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory .FWriteDataE, // Data to be written to memory

View File

@ -1,4 +1,4 @@
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen
sqrttestgen: sqrttestgen.c sqrttestgen: sqrttestgen.c
gcc sqrttestgen.c -o sqrttestgen -lm gcc sqrttestgen.c -o sqrttestgen -lm
@ -28,6 +28,10 @@ inttestgen: inttestgen.c
gcc -lm -o inttestgen inttestgen.c gcc -lm -o inttestgen inttestgen.c
./inttestgen ./inttestgen
modtestgen: modtestgen.c
gcc -lm -o modtestgen modtestgen.c
./modtestgen
clean: clean:
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen

Binary file not shown.

View File

@ -1,15 +1,14 @@
/* testgen.c */ /* testgen.c */
/* Written 10/31/96 by David Harris /* Written 7/21/2022 by Cedar Turek
This program creates test vectors for mantissa component This program creates test vectors for integer divide.
of an IEEE floating point divider.
*/ */
/* #includes */ /* #includes */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
/* Constants */ /* Constants */
@ -19,7 +18,7 @@
/* Prototypes */ /* Prototypes */
void output(FILE *fptr, long a, long b, long r, long rem); void output(FILE *fptr, long a, long b, long r);
void printhex(FILE *fptr, long x); void printhex(FILE *fptr, long x);
double random_input(void); double random_input(void);
@ -28,7 +27,7 @@ double random_input(void);
void main(void) void main(void)
{ {
FILE *fptr; FILE *fptr;
long a, b, r, rem; long a, b, r;
long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255}; long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
int i, j; int i, j;
@ -42,32 +41,22 @@ void main(void)
for (j=0; j<ENTRIES; j++) { for (j=0; j<ENTRIES; j++) {
a = list[j]; a = list[j];
r = a/b; r = a/b;
rem = a%b; output(fptr, a, b, r);
output(fptr, a, b, r, rem);
} }
} }
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
// b = random_input();
// r = a/b;
// output(fptr, a, b, r);
// }
fclose(fptr); fclose(fptr);
} }
/* Functions */ /* Functions */
void output(FILE *fptr, long a, long b, long r, long rem) void output(FILE *fptr, long a, long b, long r)
{ {
printhex(fptr, a); printhex(fptr, a);
fprintf(fptr, "_"); fprintf(fptr, "_");
printhex(fptr, b); printhex(fptr, b);
fprintf(fptr, "_"); fprintf(fptr, "_");
printhex(fptr, r); printhex(fptr, r);
fprintf(fptr, "_");
printhex(fptr, rem);
fprintf(fptr, "\n"); fprintf(fptr, "\n");
} }

View File

@ -1,2 +1 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

BIN
pipelined/srt/modtestgen Executable file

Binary file not shown.

View File

@ -0,0 +1,73 @@
/* testgen.c */
/* Written 7/21/2022 by Cedar Turek
This program creates test vectors for modulo
calculation from integer divide.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
#define ENTRIES 10
#define RANDOM_VECS 500
/* Prototypes */
void output(FILE *fptr, long a, long b, long rem);
void printhex(FILE *fptr, long x);
double random_input(void);
/* Main */
void main(void)
{
FILE *fptr;
long a, b, rem;
long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
int i, j;
if ((fptr = fopen("modtestvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}
for (i=0; i<ENTRIES; i++) {
b = list[i];
for (j=0; j<ENTRIES; j++) {
a = list[j];
rem = a%b;
output(fptr, a, b, rem);
}
}
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, long a, long b, long rem)
{
printhex(fptr, a);
fprintf(fptr, "_");
printhex(fptr, b);
fprintf(fptr, "_");
printhex(fptr, rem);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, long m)
{
fprintf(fptr, "%016llx", m);
}
double random_input(void)
{
return 1.0 + rand()/32767.0;
}

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t10'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -4)
printf("0000");
else if ((pla.tot) >= -13)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 14)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -15)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 15)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -16)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 16)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -18)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 18)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -22)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -24)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

Binary file not shown.

View File

@ -1,190 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -4)
printf("0");
else if ((pla.tot) >= -13)
printf("2");
else
printf("1");
break;
case 1:
if ((pla.tot) >= 14)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -15)
printf("2");
else
printf("1");
break;
case 2:
if ((pla.tot) >= 15)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -16)
printf("2");
else
printf("1");
break;
case 3:
if ((pla.tot) >= 16)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -18)
printf("2");
else
printf("1");
break;
case 4:
if ((pla.tot) >= 18)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 5:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 6:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -22)
printf("2");
else
printf("1");
break;
case 7:
if ((pla.tot) >= 24)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -24)
printf("2");
else
printf("1");
break;
default: printf ("X");
}
printf("\n");
(pla.tot)++;
}
(pla.divisor)++;
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t11'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -26)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 28)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -10)
printf("0000");
else if ((pla.tot) >= -28)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -32)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -34)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 36)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -36)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -40)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -44)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 44)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -46)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,6 +1,6 @@
/* sqrttestgen.c */ /* sqrttestgen.c */
/* Written 19 October 2021 David_Harris@hmc.edu /* Written 7/22/2022 by Cedar Turek
This program creates test vectors for mantissa component This program creates test vectors for mantissa component
of an IEEE floating point square root. of an IEEE floating point square root.
@ -15,6 +15,7 @@
/* Constants */ /* Constants */
#define ENTRIES 17 #define ENTRIES 17
#define BIGENT 1000
#define RANDOM_VECS 500 #define RANDOM_VECS 500
/* Prototypes */ /* Prototypes */
@ -30,15 +31,14 @@ void main(void)
FILE *fptr; FILE *fptr;
double aFrac, rFrac; double aFrac, rFrac;
int aExp, rExp; int aExp, rExp;
double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999, 1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001, 1.1, 1.5, 1.01, 1.001, 1.0001,
<<<<<<< Updated upstream
1/1.1, 1/1.5, 1/1.25, 1/1.125};
=======
2/1.1, 2/1.5, 2/1.25, 2/1.125}; 2/1.1, 2/1.5, 2/1.25, 2/1.125};
>>>>>>> Stashed changes
double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, double bigtest[BIGENT];
double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
11, 12, 13, 14, 15, 16}; 11, 12, 13, 14, 15, 16};
int i; int i;
int bias = 1023; int bias = 1023;
@ -48,13 +48,23 @@ void main(void)
exit(1); exit(1);
} }
for (i=0; i<ENTRIES; i++) { // Small Test
aFrac = mans[i]; // for (i=0; i<ENTRIES; i++) {
aExp = exps[i] + bias; // aFrac = mans[i];
rFrac = sqrt(aFrac * pow(2, aExp - bias)); // aExp = exps[i] + bias;
rExp = (int) (log(rFrac)/log(2) + bias); // rFrac = sqrt(aFrac * pow(2, exps[i]));
output(fptr, aExp, aFrac, rExp, rFrac); // rExp = (int) (log(rFrac)/log(2) + bias);
} // output(fptr, aExp, aFrac, rExp, rFrac);
// }
// WS
// Test 1: sqrt(1) = 1 0000 0000 0000 00
// Test 2: sqrt(1849/1024) = 43/32 0000 1100 1110 01
// Test 3: sqrt(5) 0000 0100 0000 00
// Test 4: sqrt(9) = 3 1111 1001 0000 00
// Test 5: sqrt(17) 0000 0001 0000 00
// Test 6: sqrt(56) 1111 1110 0000 00
// Test 7: sqrt(120) 0000 1110 0000 00
// for (i = 0; i< RANDOM_VECS; i++) { // for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input(); // a = random_input();
@ -62,6 +72,16 @@ void main(void)
// output(fptr, a, r); // output(fptr, a, r);
// } // }
// Big Test
for (i=0; i<BIGENT; i++) {
bigtest[i] = random_input();
aFrac = bigtest[i];
aExp = (i - BIGENT/2) + bias;
rFrac = sqrt(aFrac * pow(2, (i - BIGENT/2)));
rExp = (int) (log(rFrac)/log(2) + bias);
output(fptr, aExp, aFrac, rExp, rFrac);
}
fclose(fptr); fclose(fptr);
} }
@ -69,14 +89,19 @@ void main(void)
void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac) void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
{ {
// Print a in standard double format
fprintf(fptr, "%03x", aExp); fprintf(fptr, "%03x", aExp);
printhex(fptr, aFrac); printhex(fptr, aFrac);
fprintf(fptr, "_"); fprintf(fptr, "_");
// Spacing for testbench, value doesn't matter
fprintf(fptr, "%016x", 0);
fprintf(fptr, "_");
// Print r in standard double format
fprintf(fptr, "%03x", rExp); fprintf(fptr, "%03x", rExp);
printhex(fptr, rFrac); printhex(fptr, rFrac);
fprintf(fptr, "\n"); fprintf(fptr, "\n");
} }
void printhex(FILE *fptr, double m) void printhex(FILE *fptr, double m)
@ -95,6 +120,6 @@ void printhex(FILE *fptr, double m)
double random_input(void) double random_input(void)
{ {
return 1.0 + rand()/32767.0; return 1.0 + ((rand() % 32768)/32767.0);
} }

View File

@ -1,5 +1,7 @@
add wave -noupdate /testbench/* add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/* add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/* add wave -noupdate /testbench/srt/sotfc2/*
add wave -noupdate /testbench/srt/preproc/* add wave -noupdate /testbench/srt/preproc/*
add wave -noupdate /testbench/srt/postproc/*
add wave -noupdate /testbench/srt/expcalc/*
add wave -noupdate /testbench/srt/divcounter/* add wave -noupdate /testbench/srt/divcounter/*

View File

@ -2,7 +2,7 @@
// srt.sv // srt.sv
// //
// Written: David_Harris@hmc.edu 13 January 2022 // Written: David_Harris@hmc.edu 13 January 2022
// Modified: cturek@hmc.edu June 2022 // Modified: cturek@hmc.edu July 2022
// //
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit // Purpose: Combined Divide and Square Root Floating Point and Integer Unit
// //
@ -29,40 +29,42 @@
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt ( module srt (
input logic clk, input logic clk,
input logic Start, input logic Start,
input logic Stall, // *** multiple pipe stages input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages input logic Flush, // *** multiple pipe stages
// Floating Point Inputs // Floating Point
// later add exponents, signs, special cases
input logic XSign, YSign, input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp, input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] SrcXFrac, SrcYFrac, input logic [`NF-1:0] SrcXFrac, SrcYFrac,
// Integer
input logic [`XLEN-1:0] SrcA, SrcB, input logic [`XLEN-1:0] SrcA, SrcB,
// Customization
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64 input logic W64, // 32-bit ints on XLEN=64
// Selection
input logic Signed, // Interpret integers as signed 2's complement input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs input logic Int, // Choose integer inputs
input logic Mod, // perform remainder calculation (modulo) instead of divide
input logic Sqrt, // perform square root, not divide input logic Sqrt, // perform square root, not divide
output logic rsign, done, output logic rsign, done,
output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers output logic [`DIVLEN-1:0] Result,
output logic [`NE-1:0] rExp, output logic [`NE-1:0] rExp,
output logic [3:0] Flags output logic [3:0] Flags
); );
logic qp, qz, qm; // quotient is +1, 0, or -1 logic qp, qz, qn; // result bits are +1, 0, or -1
logic [`NE-1:0] calcExp; logic [`NE-1:0] calcExp;
logic calcSign; logic calcSign;
logic [`DIVLEN+3:0] X, Dpreproc; logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic [$clog2(`XLEN+1)-1:0] zeroCntD, intExp, dur, calcDur;
logic intSign; logic intSign;
logic cin;
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Mod, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign);
// Top Muxes and Registers // Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider. // When start is asserted, the inputs are loaded into the divider.
@ -76,26 +78,35 @@ module srt (
// Quotient Selection logic // Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm); qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], Sqrt, qp, qz, qn);
flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign); flopen #(1) signflop(clk, Start, calcSign, rsign);
flopen #(7) durflop(clk, Start, calcDur, dur); flopen #(7) durflop(clk, Start, calcDur, dur);
counter divcounter(clk, Start, dur, done); srtcounter divcounter(clk, Start, dur, done);
// Divisor Selection logic // Divisor Selection logic
assign Db = ~D; assign Db = ~D;
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
// If only implementing division, use divide otfc
// otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
// otherwise use sotfc
creg sotfcC(clk, Start, Sqrt, C);
sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, S, SM);
fsel2 fsel(qp, qn, C, S, SM, F);
// Adder input selection
assign AddIn = Sqrt ? F : Dsel;
// Partial Product Generation // Partial Product Generation
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); assign cin = ~Sqrt & qp;
csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
signcalc signcalc(.XSign, .YSign, .calcSign); srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Mod, .Result, .calcSign);
endmodule endmodule
//////////////// ////////////////
@ -113,47 +124,59 @@ module srtpreproc (
input logic W64, // 32-bit ints on XLEN=64 input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs input logic Int, // Choose integer inputs
input logic Mod, // perform remainder calculation (modulo) instead of divide
input logic Sqrt, // perform square root, not divide input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN+3:0] X, D, output logic [`DIVLEN+3:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent
output logic intSign // Quotient integer sign output logic intSign // Quotient integer sign
); );
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; logic [$clog2(`XLEN+1)-1:0] zeroCntA;
logic [`XLEN-1:0] PosA, PosB; logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX; logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
logic [`NF+4:0] SqrtX;
// Generate positive integer inputs if they are signed
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// Calculate leading zeros of integer inputs
lzc #(`XLEN) lzcA (PosA, zeroCntA); lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB); lzc #(`XLEN) lzcB (PosB, zeroCntB);
// Make integers have DIVLEN bits
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}}; assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}}; assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
// Shift integers to have leading ones
assign PreprocA = ExtraA << (zeroCntA + 1); assign PreprocA = ExtraA << (zeroCntA + 1);
assign PreprocB = ExtraB << (zeroCntB + 1); assign PreprocB = ExtraB << (zeroCntB + 1);
// Make mantissas have DIVLEN bits
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}}; assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
// Selecting correct divider inputs
assign DivX = Int ? PreprocA : PreprocX; assign DivX = Int ? PreprocA : PreprocX;
assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac}; assign SqrtX = XExp[0] ? {5'b11101, SrcXFrac} : {4'b1111, SrcXFrac, 1'b0};
assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
assign X = Sqrt ? SqrtX : {4'b0001, DivX};
assign D = {4'b0001, Int ? PreprocB : PreprocY}; assign D = {4'b0001, Int ? PreprocB : PreprocY};
assign intExp = zeroCntB - zeroCntA + 1;
// Integer exponent and sign calculations
assign intExp = zeroCntB - zeroCntA - Mod + (PreprocA >= PreprocB);
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); // Number of cycles of divider
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
endmodule endmodule
///////////////////////////////// /////////////////////////////////
// Quotient Selection, Radix 2 // // Quotient Selection, Radix 2 //
///////////////////////////////// /////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits module qsel2 (
input logic [`DIVLEN+3:`DIVLEN] ps, pc, input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm input logic Sqrt,
output logic qp, qz, qn
); );
logic [`DIVLEN+3:`DIVLEN] p, g; logic [`DIVLEN+3:`DIVLEN] p, g;
@ -168,7 +191,7 @@ module qsel2 ( // *** eventually just change to 4 bits
assign g = ps & pc; assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN])));
assign #1 sign = p[`DIVLEN+3] ^ cout; assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52])); (ps[52]^pc[52]));
@ -180,7 +203,7 @@ module qsel2 ( // *** eventually just change to 4 bits
// Produce quotient = +1, 0, or -1 // Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign; assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude; assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign; assign #1 qn = magnitude & sign;
endmodule endmodule
//////////////////////////////////// ////////////////////////////////////
@ -191,45 +214,41 @@ module fsel2 (
input logic [`DIVLEN+3:0] C, S, SM, input logic [`DIVLEN+3:0] C, S, SM,
output logic [`DIVLEN+3:0] F output logic [`DIVLEN+3:0] F
); );
logic [`DIVLEN+3:0] FP, FN; logic [`DIVLEN+3:0] FP, FN, FZ;
// Generate for both positive and negative bits // Generate for both positive and negative bits
assign FP = ~S & C; assign FP = ~(S << 1) & C;
assign FN = SM | (C & (~C << 2)); assign FN = (SM << 1) | (C & (~C << 2));
assign FZ = '0;
// Choose which adder input will be used // Choose which adder input will be used
assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0}); always_comb
if (sp) F = FP;
else if (sn) F = FN;
else F = FZ;
// assign F = sp ? FP : (sn ? FN : FZ);
endmodule endmodule
/////////////////////////////////// ///////////////////////////////////
// On-The-Fly Converter, Radix 2 // // On-The-Fly Converter, Radix 2 //
/////////////////////////////////// ///////////////////////////////////
module otfc2 #(parameter N=64) ( module otfc2 #(parameter N=66) (
input logic clk, input logic clk,
input logic Start, input logic Start,
input logic qp, qz, qm, input logic qp, qz, qn,
output logic [N-1:0] r output logic [N-3:0] Result
); );
// The on-the-fly converter transfers the quotient // The on-the-fly converter transfers the quotient
// bits to the quotient as they come. // bits to the quotient as they come.
// // Use this otfc for division only.
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext, QMMux; logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR; logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q); flopr #(N+3) Qreg(clk, Start, QNext, Q);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin always_comb begin
@ -241,35 +260,73 @@ module otfc2 #(parameter N=64) (
end else if (qz) begin end else if (qz) begin
QNext = {QR, 1'b0}; QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1}; QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qm is end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1}; QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0}; QMNext = {QMR, 1'b0};
end end
end end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; assign Result = Q[N] ? Q[N-1:2] : Q[N-2:1];
endmodule endmodule
/////////////////////////////// ///////////////////////////////
// Square Root OTFC, Radix 2 // // Square Root OTFC, Radix 2 //
/////////////////////////////// ///////////////////////////////
module softc2( module sotfc2(
input logic clk, input logic clk,
input logic Start, input logic Start,
input logic sp, sn, input logic sp, sn,
output logic S, input logic Sqrt,
input logic [`DIVLEN+3:0] C,
output logic [`DIVLEN+3:0] S, SM
); );
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVLEN+3:0] SNext, SMNext, SMux;
flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
flop #(`DIVLEN+4) Sreg(clk, SMux, S);
always_comb begin
if (sp) begin
SNext = S | (C & ~(C << 1));
SMNext = S;
end else if (sn) begin
SNext = SM | (C & ~(C << 1));
SMNext = SM;
end else begin // If sp and sn are not true, then sz is
SNext = S;
SMNext = SM | (C & ~(C << 1));
end
end
endmodule endmodule
//////////////////////////
// C Register for SOTFC //
//////////////////////////
module creg(input logic clk,
input logic Start,
input logic Sqrt,
output logic [`DIVLEN+3:0] C
);
logic [`DIVLEN+3:0] CMux;
mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
flop #(`DIVLEN+4) cflop(clk, CMux, C);
endmodule
///////////// /////////////
// counter // // counter //
///////////// /////////////
module counter(input logic clk, module srtcounter(input logic clk,
input logic req, input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur, input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done); output logic done
);
logic [$clog2(`XLEN+1)-1:0] count; logic [$clog2(`XLEN+1)-1:0] count;
// This block of control logic sequences the divider // This block of control logic sequences the divider
// through its iterations. You may modify it if you // through its iterations. You may modify it if you
@ -332,22 +389,86 @@ module expcalc(
input logic Sqrt, input logic Sqrt,
output logic [`NE-1:0] calcExp output logic [`NE-1:0] calcExp
); );
logic [`NE-1:0] SExp, DExp, SXExp; logic [`NE+1:0] SExp, DExp, SXExp;
assign SXExp = XExp - (`NE)'(`BIAS); assign SXExp = {2'b00, XExp} - (`NE+2)'(`BIAS);
assign SExp = {1'b0, SXExp[`NE-1:1]} + (`NE)'(`BIAS); assign SExp = (SXExp >> 1) + (`NE+2)'(`BIAS);
assign DExp = XExp - YExp + (`NE)'(`BIAS); assign DExp = {2'b00, XExp} - {2'b00, YExp} + (`NE+2)'(`BIAS);
assign calcExp = Sqrt ? SExp : DExp; assign calcExp = Sqrt ? SExp[`NE-1:0] : DExp[`NE-1:0];
endmodule endmodule
////////////// module srtpostproc(
// signcalc // input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM,
////////////// input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD,
module signcalc( input logic XSign, YSign, Signed, Int, Mod,
input logic XSign, YSign, output logic [`DIVLEN-1:0] Result,
output logic calcSign output logic calcSign
); );
logic [`DIVLEN+3:0] W, shiftRem, intRem, intS;
logic [`DIVLEN-1:0] floatRes, intRes;
logic WSign;
assign W = WS + WC;
assign WSign = W[`DIVLEN+3];
// Remainder handling
always_comb begin
if (zeroCntD == ($clog2(`XLEN+1))'(`XLEN)) begin
intRem = X;
intS = -1;
end
else if (~Signed) begin
if (WSign) begin
intRem = W + D;
intS = SM;
end else begin
intRem = W;
intS = S;
end
end
else case ({YSign, XSign, WSign})
3'b000: begin
intRem = W;
intS = S;
end
3'b001: begin
intRem = W + D;
intS = SM;
end
3'b010: begin
intRem = W - D;
intS = ~S;
end
3'b011: begin
intRem = W;
intS = ~SM;
end
3'b100: begin
intRem = W;
intS = ~SM;
end
3'b101: begin
intRem = W + D;
intS = ~SM + 1;
end
3'b110: begin
intRem = W - D;
intS = S + 1;
end
3'b111: begin
intRem = W;
intS = S;
end
endcase
end
assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0];
assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0];
assign shiftRem = (intRem >> (zeroCntD));
always_comb begin
if (Int) begin
if (Mod) Result = shiftRem[`DIVLEN-1:0];
else Result = intRes >> (`DIVLEN - dur);
end else Result = floatRes;
end
assign calcSign = XSign ^ YSign; assign calcSign = XSign ^ YSign;
endmodule
endmodule

View File

@ -1,4 +1,4 @@
`define DIVLEN 64 `include "wally-config.vh"
///////////// /////////////
// counter // // counter //
@ -39,37 +39,26 @@ endmodule
// testbench // // testbench //
////////// //////////
module testbench; module testbench;
logic clk; logic clk;
logic req; logic req;
logic done; logic done;
logic Int; logic Int, Sqrt, Mod;
logic [63:0] a, b; logic [`XLEN-1:0] a, b;
logic [51:0] afrac, bfrac; logic [`NF-1:0] afrac, bfrac;
logic [10:0] aExp, bExp; logic [`NE-1:0] aExp, bExp;
logic asign, bsign; logic asign, bsign;
logic [51:0] r; logic [`NF-1:0] r;
logic [63:0] rInt; logic [`XLEN-1:0] rInt;
logic [`DIVLEN-1:0] Quot; logic [`DIVLEN-1:0] Quot;
// Test parameters // Test parameters
parameter MEM_SIZE = 40000; parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 64+64+64+64; parameter MEM_WIDTH = 64+64+64;
// INT TEST SIZES // Test sizes
// `define memrem 63:0
// `define memr 127:64
// `define memb 191:128
// `define mema 255:192
// FLOAT TEST SIZES
// `define memr 63:0
// `define memb 127:64
// `define mema 191:128
// SQRT TEST SIZES
`define memr 63:0 `define memr 63:0
`define mema 127:64 `define memb 127:64
`define memb 191:128 `define mema 191:128
// Test logicisters // Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
@ -80,8 +69,9 @@ module testbench;
logic rsign; logic rsign;
integer testnum, errors; integer testnum, errors;
// Equip Int test or Sqrt test // Equip Int, Sqrt, or IntMod test
assign Int = 1'b0; assign Int = 1'b0;
assign Mod = 1'b0;
assign Sqrt = 1'b1; assign Sqrt = 1'b1;
// Divider // Divider
@ -91,8 +81,8 @@ module testbench;
.XSign(asign), .YSign(bsign), .rsign, .XSign(asign), .YSign(bsign), .rsign,
.SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA(a), .SrcB(b), .Fmt(2'b00), .SrcA(a), .SrcB(b), .Fmt(2'b00),
.W64(1'b1), .Signed(1'b0), .Int, .Sqrt, .W64(1'b1), .Signed(1'b0), .Int, .Mod, .Sqrt,
.Quot, .Rem(), .Flags(), .done); .Result(Quot), .Flags(), .done);
// Counter // Counter
// counter counter(clk, req, done); // counter counter(clk, req, done);
@ -118,7 +108,7 @@ module testbench;
b = Vec[`memb]; b = Vec[`memb];
{bsign, bExp, bfrac} = b; {bsign, bExp, bfrac} = b;
nextr = Vec[`memr]; nextr = Vec[`memr];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = Quot; rInt = Quot;
req <= #5 1; req <= #5 1;
end end
@ -126,10 +116,10 @@ module testbench;
// Apply directed test vectors read from file. // Apply directed test vectors read from file.
always @(posedge clk) begin always @(posedge clk) begin
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = Quot; rInt = Quot;
if (done) begin if (done) begin
if (~Int & ~Sqrt) begin if (~Int & ~Sqrt) begin // This test case checks floating point division
req <= #5 1; req <= #5 1;
diffp = correctr[51:0] - r; diffp = correctr[51:0] - r;
diffn = r - correctr[51:0]; diffn = r - correctr[51:0];
@ -145,35 +135,32 @@ module testbench;
$display("%d Tests completed successfully", testnum); $display("%d Tests completed successfully", testnum);
$stop; $stop;
end end
end else if (~Sqrt) begin end else if (~Sqrt) begin // This test case works for both integer divide and integer modulo
req <= #5 1; req <= #5 1;
diffp = correctr[63:0] - rInt; diffp = correctr[63:0] - rInt;
diffn = rInt - correctr[63:0]; if (($signed(diffp) != 0) | (diffp === 64'bx)) // check if accurate to 1 ulp
if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin begin
errors = errors+1; errors = errors+1;
$display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp); $display("result was %h, should be %h %h\n", rInt, correctr, diffp);
$display("failed\n"); $display("failed\n");
$stop;
end end
if (afrac === 52'hxxxxxxxxxxxxx) if (afrac === 52'hxxxxxxxxxxxxx)
begin begin
$display("%d Tests completed successfully", testnum); $display("%d Tests completed successfully", testnum - errors);
$stop; $stop;
end end
end else begin end else begin // This test case verifies square root
req <= #5 1; req <= #5 1;
diffp = correctr[51:0] - r; diffp = correctr[51:0] - r;
diffn = r - correctr[51:0]; diffn = r - correctr[51:0];
if (rExp !== correctr[62:52]) // check if accurate to 1 ulp if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin begin
errors = errors + 1; errors = errors + 1;
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
$display("failed\n"); $display("failed\n");
$stop;
end end
if (afrac === 52'hxxxxxxxxxxxxx) begin if (afrac === 52'hxxxxxxxxxxxxx) begin
$display("%d Tests completed successfully", testnum); $display("%d Tests completed successfully", testnum-errors);
$stop; end $stop; end
end end
end end

View File

@ -66,9 +66,9 @@ module testbenchfp;
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic AnsNaN, ResNaN, NaNGood; logic AnsNaN, ResNaN, NaNGood;
logic XSgn, YSgn, ZSgn; // sign of the inputs logic Xs, Ys, Zs; // sign of the inputs
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs
logic XNaN, YNaN, ZNaN; // is the input NaN logic XNaN, YNaN, ZNaN; // is the input NaN
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
logic XDenorm, ZDenorm; // is the input denormalized logic XDenorm, ZDenorm; // is the input denormalized
@ -80,24 +80,26 @@ module testbenchfp;
logic CvtResSgnE; logic CvtResSgnE;
logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot; logic [`DIVb-(`RADIX/4):0] Quot;
logic CvtResDenormUfE; logic CvtResDenormUfE;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2; logic [`DURLEN-1:0] EarlyTermShift;
logic DivStart, DivBusy; logic DivStart, DivBusy;
logic reset = 1'b0; logic reset = 1'b0;
logic [`DIVLEN-1:0] DivX; logic [`DIVLEN-1:0] DivX;
logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WSN, WS; logic [`DIVLEN+3:0] NextWSN, WS;
logic [`DIVLEN+3:0] WCN, WC; logic [`DIVLEN+3:0] NextWCN, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DURLEN-1:0] Dur;
// in-between FMA signals // in-between FMA signals
logic Mult; logic Mult;
logic Ss;
logic [`NE+1:0] Pe; logic [`NE+1:0] Pe;
logic [`NE+1:0] Se;
logic ZmSticky; logic ZmSticky;
logic KillProd; logic KillProd;
logic [$clog2(3*`NF+7)-1:0] NCnt; logic [$clog2(3*`NF+7)-1:0] SCnt;
logic [3*`NF+5:0] Sm; logic [3*`NF+5:0] Sm;
logic InvA; logic InvA;
logic NegSum; logic NegSum;
@ -254,16 +256,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b11}; Fmt = {Fmt, 2'b11};
end end
end end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
// // add the square-root tests/op-ctrls/unit/fmt // add the square-root tests/op-ctrls/unit/fmt
// Tests = {Tests, f128sqrt}; Tests = {Tests, f128sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL}; OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0}; WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT}; Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b11}; Fmt = {Fmt, 2'b11};
// end end
// end end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
Tests = {Tests, f128fma}; Tests = {Tests, f128fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL}; OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -381,16 +383,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b01}; Fmt = {Fmt, 2'b01};
end end
end end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
// // add the correct tests/op-ctrls/unit/fmt to their lists // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f64sqrt}; Tests = {Tests, f64sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL}; OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0}; WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT}; Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b01}; Fmt = {Fmt, 2'b01};
// end end
// end end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
Tests = {Tests, f64fma}; Tests = {Tests, f64fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL}; OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -492,16 +494,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b00}; Fmt = {Fmt, 2'b00};
end end
end end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f32sqrt}; Tests = {Tests, f32sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL}; OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0}; WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT}; Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b00}; Fmt = {Fmt, 2'b00};
// end end
// end end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested if (TEST === "fma" | TEST === "all") begin // if fma is being tested
Tests = {Tests, f32fma}; Tests = {Tests, f32fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL}; OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -585,16 +587,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b10}; Fmt = {Fmt, 2'b10};
end end
end end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f16sqrt}; Tests = {Tests, f16sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL}; OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0}; WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT}; Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b10}; Fmt = {Fmt, 2'b10};
// end end
// end end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested if (TEST === "fma" | TEST === "all") begin // if fma is being tested
Tests = {Tests, f16fma}; Tests = {Tests, f16fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL}; OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -648,14 +650,14 @@ module testbenchfp;
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), .Xs, .Ys, .Zs, .Unit(UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, .Xm, .Ym, .Zm, .DivStart,
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XNaN, .YNaN, .ZNaN,
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), .XSNaN, .YSNaN, .ZSNaN,
.XDenormE(XDenorm), .ZDenormE(ZDenorm), .XDenorm, .ZDenorm,
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), .XZero, .YZero, .ZZero,
.XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax), .XInf, .YInf, .ZInf, .XExpMax,
.X, .Y, .Z); .X, .Y, .Z);
@ -671,35 +673,34 @@ module testbenchfp;
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
// instantiate devices under test // instantiate devices under test
fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs),
.Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xe(Xe), .Ye(Ye), .Ze(Ze),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .Xm(Xm), .Ym(Ym), .Zm(Zm),
.XZero, .YZero, .ZZero, .XZero, .YZero, .ZZero, .Ss, .Se,
.FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps,
.Pe, .ZmSticky, .KillProd); .Pe, .ZmSticky, .KillProd);
postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), .Ze(Ze), .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky, .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
.XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
.FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se),
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes)); .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal),
.XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, .XZero(XZero), .XDenorm(XDenorm), .OpCtrl(OpCtrlVal), .IntZero,
.Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye,
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); divsqrt divsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2)); .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp),
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone);
.Quot, .Rem(), .DivCalcExpM(DivCalcExp));
assign CmpFlg[3:0] = 0; assign CmpFlg[3:0] = 0;
@ -854,7 +855,7 @@ end
// check if result is correct // check if result is correct
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1; errors += 1;
$display("There is an error in %s", Tests[TestNum]); $display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -867,10 +868,10 @@ end
// Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but
// the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff... // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) | else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[`XLEN-1:0] === (`XLEN)'(0))) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) |
(~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
errors += 1; errors += 1;
$display("There is an error in %s", Tests[TestNum]); $display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -923,18 +924,19 @@ module readvectors (
output logic [`FLEN-1:0] Ans, output logic [`FLEN-1:0] Ans,
output logic [`XLEN-1:0] SrcA, output logic [`XLEN-1:0] SrcA,
output logic [4:0] AnsFlg, output logic [4:0] AnsFlg,
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMaxE, output logic XExpMax,
output logic DivStart, output logic DivStart,
output logic [`FLEN-1:0] X, Y, Z output logic [`FLEN-1:0] X, Y, Z
); );
logic XEn, YEn, ZEn;
// apply test vectors on rising edge of clk // apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg // Format of vectors Inputs(1/2/3)_AnsFlg
@ -1005,40 +1007,72 @@ module readvectors (
end end
endcase endcase
`DIVUNIT: `DIVUNIT:
case (Fmt) if(OpCtrl[0])
2'b11: begin // quad case (Fmt)
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; 2'b11: begin // quad
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; X = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8]; Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5; if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0; DivStart = 1'b0;
end end
2'b01: if (`D_SUPPORTED)begin // double 2'b01: if (`D_SUPPORTED)begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; if (~clk) #5;
if (~clk) #5; DivStart = 1'b1; #10
DivStart = 1'b1; #10 DivStart = 1'b0;
DivStart = 1'b0; end
end 2'b00: if (`S_SUPPORTED)begin // single
2'b00: if (`S_SUPPORTED)begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; if (~clk) #5;
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; DivStart = 1'b1; #10
if (~clk) #5; DivStart = 1'b0;
DivStart = 1'b1; #10 end
DivStart = 1'b0; 2'b10: begin // half
end X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
2'b10: begin // half Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; if (~clk) #5;
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; DivStart = 1'b1; #10
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; DivStart = 1'b0;
if (~clk) #5; end
DivStart = 1'b1; #10 endcase
DivStart = 1'b0; else
end case (Fmt)
endcase 2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
2'b01: if (`D_SUPPORTED)begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b00: if (`S_SUPPORTED)begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
endcase
`CMPUNIT: `CMPUNIT:
case (Fmt) case (Fmt)
2'b11: begin // quad 2'b11: begin // quad
@ -1256,8 +1290,12 @@ module readvectors (
endcase endcase
end end
unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]));
.XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, assign ZEn = (Unit == `FMAUNIT);
.XExpMaxE);
unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
.Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
.XDenorm, .ZDenorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
.XEn, .YEn, .ZEn, .XExpMax);
endmodule endmodule

View File

@ -89,7 +89,8 @@ logic [3:0] dummy;
if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
else tests = {arch64c}; else tests = {arch64c};
"arch64m": if (`M_SUPPORTED) tests = arch64m; "arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d; "arch64f": if (`F_SUPPORTED) tests = arch64f;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i; "imperas64i": tests = imperas64i;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d; "imperas64d": if (`D_SUPPORTED) tests = imperas64d;
@ -112,6 +113,7 @@ logic [3:0] dummy;
else tests = {arch32c}; else tests = {arch32c};
"arch32m": if (`M_SUPPORTED) tests = arch32m; "arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f; "arch32f": if (`F_SUPPORTED) tests = arch32f;
"arch32d": if (`D_SUPPORTED) tests = arch32d;
"imperas32i": tests = imperas32i; "imperas32i": tests = imperas32i;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f; "imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m; "imperas32m": if (`M_SUPPORTED) tests = imperas32m;
@ -123,6 +125,7 @@ logic [3:0] dummy;
"wally32priv": tests = wally32priv; "wally32priv": tests = wally32priv;
"wally32periph": tests = wally32periph; "wally32periph": tests = wally32periph;
"embench": tests = embench; "embench": tests = embench;
"coremark": tests = coremark;
endcase endcase
end end
if (tests.size() == 0) begin if (tests.size() == 0) begin
@ -177,7 +180,7 @@ logic [3:0] dummy;
testadr = 0; testadr = 0;
testadrNoBase = 0; testadrNoBase = 0;
// riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests // riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests
riscofTest = tests[0] == "1"; // | tests[0] == "2"; riscofTest = tests[0] == "1" | tests[0] == "2";
// fill memory with defined values to reduce Xs in simulation // fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not // Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte // guarantee the initialized reads. For example a strcmp can read 6 byte
@ -193,13 +196,19 @@ logic [3:0] dummy;
/* if (tests[0] == `IMPERASTEST) /* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0]; pathname = tvpaths[0];
else pathname = tvpaths[1]; */ else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"}; if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
else memfilename = {pathname, tests[test], ".elf.memfile"};
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM); if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM); if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; if (riscofTest) begin
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
end else begin
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
end
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test) // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
@ -239,7 +248,8 @@ logic [3:0] dummy;
// this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
// also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
$display("Embench Benchmark: %s is done.", tests[test]); $display("Embench Benchmark: %s is done.", tests[test]);
outputfile = {pathname, tests[test], ".sim.output"}; if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"};
else outputfile = {pathname, tests[test], ".sim.output"};
outputFilePointer = $fopen(outputfile); outputFilePointer = $fopen(outputfile);
i = 0; i = 0;
while ($unsigned(i) < $unsigned(5'd5)) begin while ($unsigned(i) < $unsigned(5'd5)) begin
@ -254,7 +264,7 @@ logic [3:0] dummy;
for(i=0; i<SIGNATURESIZE; i=i+1) begin for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx; sig32[i] = 'bx;
end end
if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"}; if (riscofTest) signame = {pathname, tests[test], "/ref/Reference-sail_c_simulator.signature"};
else signame = {pathname, tests[test], ".signature.output"}; else signame = {pathname, tests[test], ".signature.output"};
// read signature, reformat in 64 bits if necessary // read signature, reformat in 64 bits if necessary
$readmemh(signame, sig32); $readmemh(signame, sig32);
@ -311,14 +321,20 @@ logic [3:0] dummy;
else begin else begin
// If there are still additional tests to run, read in information for the next test // If there are still additional tests to run, read in information for the next test
//pathname = tvpaths[tests[0]]; //pathname = tvpaths[tests[0]];
memfilename = {pathname, tests[test], ".elf.memfile"}; if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
else memfilename = {pathname, tests[test], ".elf.memfile"};
//$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); //$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM); if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM); if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; if (riscofTest) begin
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
end else begin
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
end
ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 }; ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 };
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename); $display("Read memfile %s", memfilename);
@ -427,8 +443,13 @@ module DCacheFlushFSM
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN;
localparam integer lognumlines = $clog2(numlines); localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN;
localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM;
//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM;
localparam integer numwords = sramlen/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen); localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways); localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen; localparam integer tagstart = lognumlines + loglinebytelen;
@ -436,16 +457,17 @@ module DCacheFlushFSM
genvar index, way, cacheWord; genvar index, way, cacheWord;
logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; logic [sramlen-1:0] cacheline;
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
for(index = 0; index < numlines; index++) begin for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart), copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen)) .loglinebytelen(loglinebytelen), .sramlen(sramlen))
copyShadow(.clk, copyShadow(.clk,
.start, .start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
@ -463,18 +485,25 @@ module DCacheFlushFSM
end end
end end
integer i, j, k; integer i, j, k, l;
always @(posedge clk) begin always @(posedge clk) begin
if (start) begin #1 if (start) begin #1
#1 #1
for(i = 0; i < numlines; i++) begin for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin for(l = 0; l < cachesramwords; l++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; for(k = 0; k < numwords; k++) begin
//cacheline = CacheData[j][i][0];
// does not work with modelsim
// # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions.
// see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions
//ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k];
ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN];
end
end
end end
end
end end
end end
end end
@ -484,15 +513,15 @@ module DCacheFlushFSM
endmodule endmodule
module copyShadow module copyShadow
#(parameter tagstart, loglinebytelen) #(parameter tagstart, loglinebytelen, sramlen)
(input logic clk, (input logic clk,
input logic start, input logic start,
input logic [`PA_BITS-1:tagstart] tag, input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty, input logic valid, dirty,
input logic [`XLEN-1:0] data, input logic [sramlen-1:0] data,
input logic [32-1:0] index, input logic [32-1:0] index,
input logic [32-1:0] cacheWord, input logic [32-1:0] cacheWord,
output logic [`XLEN-1:0] CacheData, output logic [sramlen-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr, output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag, output logic [`XLEN-1:0] CacheTag,
output logic CacheValid, output logic CacheValid,
@ -505,7 +534,7 @@ module copyShadow
CacheValid = valid; CacheValid = valid;
CacheDirty = dirty; CacheDirty = dirty;
CacheData = data; CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8));
end end
end end
@ -531,4 +560,4 @@ task automatic updateProgramAddrLabelArray;
end end
$fclose(ProgramLabelMapFP); $fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP); $fclose(ProgramAddrMapFP);
endtask endtask

Some files were not shown because too many files have changed in this diff Show More