This commit is contained in:
Madeleine Masser-Frye 2022-08-02 16:25:25 +00:00
commit c866344fb6
185 changed files with 6856 additions and 5978 deletions

5
.gitignore vendored
View File

@ -7,12 +7,8 @@ __pycache__/
.vscode/
#External repos
addins
addins/riscv-arch-test/Makefile.include
addins/riscv-tests/target
addins/coremark/work/*
addins/embench/bd_speed/*
addins/embench/bd_size/*
benchmarks/embench/wally*.json
#vsim work files to ignore
@ -34,6 +30,7 @@ testsBP/*/*.a
tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/*
tests/riscof/riscof_work/
tests/riscof/config32.ini
tests/riscof/config32e.ini
tests/riscof/config64.ini
tests/linux-testgen/linux-testvectors/*
!tests/linux-testgen/linux-testvectors/tvCopier.py

1
.gitmodules vendored
View File

@ -17,6 +17,7 @@
[submodule "addins/embench-iot"]
path = addins/embench-iot
url = https://github.com/embench/embench-iot
branch = embench-1.0-branch
[submodule "addins/coremark"]
path = addins/coremark
url = https://github.com/eembc/coremark

@ -1 +1 @@
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545

View File

@ -1,6 +1,8 @@
# Wally Coremark Makefile
# Daniel Torres & David Harris 28 July 2022
PORT_DIR = $(CURDIR)/riscv64-baremetal
cmbase=../../addins/coremark
# cmbase= ../riscv-coremark/coremark
work_dir= ../../benchmarks/coremark/work
XLEN ?=64
sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
@ -8,27 +10,18 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32)
PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=rv$(XLEN)im -static -falign-functions=16 \
-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \
ARCH := rv$(XLEN)im
PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \
-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions -falign-jumps=4 \
-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \
-nostdlib -nostartfiles -ffreestanding -mstrict-align \
-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1
# flags that cause build errors mcmodel=medlow
# -static -mcmodel=medlow -mtune=sifive-7-series \
# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET
# -finline-functions -falign-jumps=4 \
# -nostdlib -nostartfiles -ffreestanding -mstrict-align \
# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
# -DPERFORMANCE_RUN=1
# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN)
all: $(work_dir)/coremark.bare.riscv.elf.memfile
run:
(cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log))
(cd ../../pipelined/regression && (time vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log))
cd ../../benchmarks/coremark/
$(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
@ -37,9 +30,7 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
extractFunctionRadix.sh $<.elf.objdump
$(work_dir)/coremark.bare.riscv: $(sources) Makefile
# These flags were used by WD on CoreMark
make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)"
# -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
mkdir -p $(work_dir)
mv $(cmbase)/coremark.bare.riscv $(work_dir)
@ -49,14 +40,3 @@ clean:
rm -f $(work_dir)/*
# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \
# # -O3 -falign-functions=16 -funroll-all-loops \
# # -finline-functions -falign-jumps=4 \
# # -nostdlib -nostartfiles -ffreestanding -mstrict-align \
# # -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
# # -DPERFORMANCE_RUN=1
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "

View File

@ -114,7 +114,12 @@ void portable_free(void *p) {
#define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
// #if (XLEN==64)
// typedef unsigned long long ee_ptr_int;
// #else
// typedef unsigned long ee_ptr_int;
// #endif
#define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000
@ -196,10 +201,13 @@ void stop_time(void) {
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
unsigned long instructions = minstretDiff();
ee_printf(" Called get_time\n");
ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100
ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100
ee_printf(" WALLY CoreMark Results (from get_time)\n");
ee_printf(" Elapsed MTIME: %u\n", elapsed);
ee_printf(" Elapsed MINSTRET: %lu\n", instructions);
ee_printf(" CPI: %lu / %lu\n", elapsed, instructions);
ee_printf(" COREMARK/MHz Score: 10,000,000 / %lu = %d.%02d \n", elapsed, cm100/100, cm100%100);
ee_printf(" CPI: %lu / %lu = %d.%02d\n", elapsed, instructions, cpi100/100, cpi100%100);
return elapsed;
}
/* Function: time_in_secs

View File

@ -66,14 +66,19 @@ typedef size_t CORE_TICKS;
#elif HAS_TIME_H
#include <time.h>
typedef clock_t CORE_TICKS;
#else
// #elif (XLEN==32)
// #include <sys/types.h>
// typedef ee_u32 CORE_TICKS;
/* Configuration: size_t and clock_t
Note these need to match the size of the clock output and the xLen the processor supports
*/
#elif (XLEN==64)
typedef unsigned long int size_t;
typedef unsigned long int clock_t;
typedef clock_t CORE_TICKS;
#else
#include <sys/types.h>
#endif
typedef clock_t CORE_TICKS;
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
@ -89,7 +94,7 @@ typedef clock_t CORE_TICKS;
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)"
#define MEM_LOCATION "Code and Data in external RAM"
#define MEM_LOCATION_UNSPEC 1
#endif
@ -105,11 +110,16 @@ typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef unsigned long long ee_ptr_int;
#if (XLEN==64)
typedef unsigned long long ee_ptr_int;
#else
typedef ee_u32 ee_ptr_int;
#endif
typedef size_t ee_size_t;
/* align an offset to point to a 32b value */
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
/* Configuration: SEED_METHOD
Defines method to get seed values that cannot be computed at compile time.

View File

@ -33,13 +33,13 @@ CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
#PORT_CFLAGS = -O2 -static -std=gnu99
PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld
PORT_CFLAGS = -mcmodel=medany -fno-tree-loop-distribute-patterns -fno-common -lm -lgcc -T $(PORT_DIR)/link.ld
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END +=
LFLAGS_END += -static-libgcc -lgcc
# Flag: PORT_SRCS
# Port specific source files can be added here
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S

View File

@ -4,7 +4,8 @@
embench_dir = ../../addins/embench-iot
all: build sim size
all: build
run: size sim
allClean: clean all

View File

@ -130,5 +130,4 @@
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 1

View File

@ -141,5 +141,4 @@
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 1

View File

@ -135,5 +135,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -133,5 +133,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -135,6 +135,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -133,6 +133,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -136,6 +136,4 @@
`define TESTSBP 1
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -32,14 +32,14 @@
`define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
`define XLEN 32
// IEEE 754 compliance
`define IEEE754 0
// MISA RISC-V configuration per specification
// ZYXWVUTSRQPONMLKJIHGFEDCBA
`define MISA 32'b0000000000101000001000100100101
`define MISA 32'b0000000000101000001000100101101
`define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32
@ -137,5 +137,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -136,5 +136,4 @@
`define TESTSBP 0
`define BPRED_SIZE 10
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -95,11 +95,27 @@
// largest length in IEU/FPU
`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
// division constants
`define RADIX 32'h4
`define DIVCOPIES 32'h4
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
// one interation is required for the integer bit for minimally redundent radix-4
`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
`define DURLEN ($clog2(`FPDUR+1))
`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
`define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1
`define USE_SRAM 0

View File

@ -1,4 +1,4 @@
all: archtests wallytests memfiles
all: riscoftests memfiles
# *** Build old tests/imperas-riscv-tests for now;
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
@ -19,18 +19,15 @@ allclean: clean all
clean:
make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test
# make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests
archtests:
# Build riscv-arch-test 64 and 32-bit versions
make -C ../../tests/riscof/ --jobs
make -C ../../tests/riscof/ XLEN=32 --jobs
wallytests:
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
riscoftests:
# Builds riscv-arch-test 64 and 32-bit versions and builds wally-riscv-arch-test 64 and 32-bit versions
make -C ../../tests/riscof/
# make -C ../../tests/riscof/ XLEN=32
# make -C ../../tests/riscof/ XLEN=32 build_rv32e
# make -C ../../tests/riscof/ XLEN=64
memfiles:
make -f makefile-memfile wally-sim-files --jobs

View File

@ -11,11 +11,9 @@ add wave -noupdate -expand -group Testbench /testbench/interruptEpcVal
add wave -noupdate -expand -group Testbench /testbench/interruptTVal
add wave -noupdate -expand -group Testbench /testbench/interruptDesc
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/ExceptionM
@ -56,26 +54,25 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -group {Execution Stage} /testbench/ExpectedPCE
add wave -noupdate -group {Execution Stage} /testbench/MepcExpected
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -group {Execution Stage} /testbench/textE
add wave -noupdate -group {Execution Stage} -color {Cornflower Blue} /testbench/FunctionName/FunctionName
add wave -noupdate -group {Memory Stage} /testbench/checkInstrM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -group {Memory Stage} /testbench/ExpectedPCM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -group {Memory Stage} /testbench/textM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group {WriteBack stage} /testbench/checkInstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrValidW
add wave -noupdate -group {WriteBack stage} /testbench/PCW
add wave -noupdate -group {WriteBack stage} /testbench/ExpectedPCW
add wave -noupdate -group {WriteBack stage} /testbench/InstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrWName
add wave -noupdate -group {WriteBack stage} /testbench/textW
add wave -noupdate -expand -group {Memory Stage} /testbench/checkInstrM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -expand -group {Memory Stage} /testbench/ExpectedPCM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -expand -group {Memory Stage} /testbench/textM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -expand -group {WriteBack stage} /testbench/checkInstrW
add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrValidW
add wave -noupdate -expand -group {WriteBack stage} /testbench/PCW
add wave -noupdate -expand -group {WriteBack stage} /testbench/ExpectedPCW
add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrW
add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrWName
add wave -noupdate -expand -group {WriteBack stage} /testbench/textW
add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
add wave -noupdate -group Bpred -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF
add wave -noupdate -group Bpred -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]}
@ -196,203 +193,202 @@ add wave -noupdate -group ifu -expand -group icache -expand -group memory /testb
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group ifu -expand -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
add wave -noupdate -group lsu /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUPAdrM
add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData[69]}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V
add wave -noupdate -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead
add wave -noupdate -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUPAdrM
add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -expand -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -expand -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_D
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_A
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_U
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_X
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_W
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_R
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/PTE_V
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status -color Maroon /testbench/dut/core/lsu/dmmu/dmmu/DAPageFault
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/ImproperPrivilege
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/UpperBitsUnequalPageFault
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Misaligned
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidRead
add wave -noupdate -expand -group lsu -group dtlb -expand -group Status /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/dtlb/InvalidWrite
add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -expand -group lsu -group dtlb -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -expand -group lsu -group dtlb -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissF
add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/ITLBMissOrDAFaultF
add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState
add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM
@ -424,55 +420,14 @@ add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/immu/PMAInstrAccessFaultF
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb
@ -486,11 +441,6 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group {debug trace} -expand -group mem -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group {debug trace} -expand -group mem /testbench/checkInstrM
add wave -noupdate -group {debug trace} -expand -group mem /testbench/dut/core/PCM
@ -517,7 +467,7 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 4} {2240751 ns} 0}
WaveRestoreCursors {{Cursor 4} {87475 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 250
configure wave -valuecolwidth 314
@ -533,4 +483,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {2240730 ns} {2240764 ns}
WaveRestoreZoom {87386 ns} {87672 ns}

View File

@ -64,7 +64,7 @@ tc = TestCase(
grepstr="400100000 instructions")
configs.append(tc)
tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"]
tests64gc = ["arch64i", "arch64priv", "arch64c", "arch64m", "arch64f", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64m", "wally64a", "imperas64c", "wally64periph", "wally64priv"] # , "imperas64mmu" "wally64i", #, "testsBP64"]
for test in tests64gc:
tc = TestCase(
name=test,
@ -73,7 +73,7 @@ for test in tests64gc:
grepstr="All tests ran without failures")
configs.append(tc)
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i",
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "arch32d", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i",
for test in tests32gc:
tc = TestCase(
name=test,

View File

@ -6,7 +6,7 @@
# fma - test fma
# sub - test subtraction
# div - test division
# sqrt - test square ro
# sqrt - test square root
# all - test everything
vsim -do "do testfloat.do rv64fp mul"
vsim -do "do testfloat.do rv64fp $1"

View File

@ -1,7 +1,9 @@
# cvtint - test integer conversion unit (fcvtint)
# cvtfp - test floating-point conversion unit (fcvtfp)
# cmp - test comparison unit's LT, LE, EQ opperations (fcmp)
# add - test addition
# fma - test fma
# sub - test subtraction
# div - test division
# sqrt - test square root

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv32gc arch32i"
vsim -do "do wally-pipelined.do rv32gc wally32periph"

View File

@ -1 +1 @@
vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f"
vsim -c -do "do wally-pipelined-batch.do rv64gc arch64d"

View File

@ -38,7 +38,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 -fatal 7
#-- Run the Simulation
run -all
#run -all
add log -recursive /*
do linux-wave.do
run -all

View File

@ -9,22 +9,32 @@ add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans
add wave -noupdate /testbenchfp/DivStart
add wave -noupdate /testbenchfp/DivBusy
add wave -noupdate /testbenchfp/srtfsm/state
add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -5,38 +5,38 @@ add wave -noupdate /testbench/reset
add wave -noupdate /testbench/reset_ext
add wave -noupdate /testbench/memfilename
add wave -noupdate /testbench/dut/core/SATP_REGW
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/StoreStallD
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -group HDU -group hazards /testbench/dut/core/MDUStallD
add wave -noupdate -group HDU -group hazards /testbench/dut/core/hzu/DivBusyE
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
add wave -noupdate -group HDU -expand -group traps /testbench/dut/core/priv/priv/trap/InterruptM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWriteFencePendingDEM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/ifu/IFUStallF
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/MDUStallD
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/DivBusyE
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrMisalignedFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrAccessFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/IllegalInstrFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/BreakpointFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadMisalignedFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoMisalignedFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadAccessFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoAccessFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/EcallFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InstrPageFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/LoadPageFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM
add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/FinalInstrRawF
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
@ -55,10 +55,10 @@ add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group {WriteBack stage} /testbench/PCW
add wave -noupdate -group {WriteBack stage} /testbench/InstrW
add wave -noupdate -group {WriteBack stage} /testbench/InstrWName
@ -190,195 +190,179 @@ add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED
add wave -noupdate -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
add wave -noupdate -expand -group lsu -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/interlockfsm/InterlockCurrState
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/InterlockStall
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM
add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/busdp/SelUncachedAdr
add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/busdp/busfsm/BusCurrState
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusRead
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusWrite
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAdr
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusAck
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHRDATA
add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUBusHWDATA
add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrM
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/RAdrD}
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group flush /testbench/dut/core/lsu/CacheableM
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/RAdr
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Valid}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag}
add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimTag
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirtyWay
add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimDirty
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/RW
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/PAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/Atomic
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/FlushCache
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/ReadDataWordM
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/core/lsu/FinalWriteDataM
add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/core/lsu/bus/dcache/dcache/HitWay
add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheFetchLine
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteLine
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusWriteData
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck
add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord
add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress
add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault
add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM
add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr
add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE
add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal
add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/AtomicAllowed
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM
add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PhysicalAddress
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/ReadAccessM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/WriteAccessM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPADDR_ARRAY_REGW
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/pmpchecker/PMPCFG_ARRAY_REGW
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/WalkerState
add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PCF
add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWReadPTE
add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/HPTWAdr
add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/PTE
add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/ITLBWriteF
add wave -noupdate -expand -group lsu -group ptwalker -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/DTLBWriteM
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intClaim
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intEn
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/intInProgress
@ -392,41 +376,12 @@ add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/pl
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/max_priority_with_irqs
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/irqs_at_max_priority
add wave -noupdate -group plic -expand -group internals /testbench/dut/uncore/plic/plic/threshMask
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESETn
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESPUART
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HREADYUART
add wave -noupdate -group uart -expand -group Registers -expand /testbench/dut/uncore/uart/uart/u/LSR
add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MCR
add wave -noupdate -group uart -expand -group Registers /testbench/dut/uncore/uart/uart/u/MSR
@ -489,6 +444,57 @@ add wave -noupdate -group ifu -expand -group icache -expand -group {fsm out and
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusAdr
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CacheBusAck
add wave -noupdate -group ifu -expand -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/icache/CacheBusWriteData
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/VictimWay
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetDirtyWay
add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/SetValidWay
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way3 -group way3word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way2 -group way2word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way1 -group way1word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word0 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -expand -group way0word1 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word2 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ByteMask}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/ReadData}
add wave -noupdate -group ifu -expand -group icache -group way0 -group way0word3 {/testbench/dut/core/ifu/bus/icache/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/TLBWrite
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group ifu -group itlb /testbench/dut/core/ifu/immu/immu/PhysicalAddress
@ -506,11 +512,9 @@ add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {I
add wave -noupdate -group {Performance Counters} -expand -group ICACHE -label {ICACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
add wave -noupdate -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/NextPTE
add wave -noupdate /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/lsuvirtmem/hptw/UpdatePTE
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 5} {0 ns} 0}
quietly wave cursor active 1
WaveRestoreCursors {{Cursor 2} {989221 ns} 1} {{Cursor 3} {999815 ns} 1} {{Cursor 4} {311315 ns} 0}
quietly wave cursor active 3
configure wave -namecolwidth 250
configure wave -valuecolwidth 314
configure wave -justifyvalue left
@ -525,4 +529,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {0 ns} {208 ns}
WaveRestoreZoom {311178 ns} {311464 ns}

View File

@ -30,7 +30,7 @@
`include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTERVAL, DCACHE) (
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk,
input logic reset,
// cpu side
@ -38,14 +38,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
input logic [1:0] RW,
input logic [1:0] Atomic,
input logic FlushCache,
input logic InvalidateCacheM,
input logic InvalidateCache,
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData,
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FpLoadStoreM,
input logic [(WORDLEN-1)/8:0] ByteMask,
input logic [WORDLEN-1:0] FinalWriteData,
output logic CacheCommitted,
output logic CacheStall,
// to performance counters to cpu
@ -60,7 +57,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
output logic CacheFetchLine,
output logic CacheWriteLine,
input logic CacheBusAck,
input logic [LOGWPL-1:0] WordCount,
input logic [LOGBWPL-1:0] WordCount,
input logic LSUBusWriteCrit,
output logic [`PA_BITS-1:0] CacheBusAdr,
input logic [LINELEN-1:0] CacheBusWriteData,
@ -72,7 +69,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
localparam SETLEN = $clog2(NUMLINES);
localparam SETTOP = SETLEN+OFFSETLEN;
localparam TAGLEN = `PA_BITS - SETTOP;
localparam WORDSPERLINE = LINELEN/`XLEN;
localparam WORDSPERLINE = LINELEN/WORDLEN;
localparam FlushAdrThreshold = NUMLINES - 1;
logic SelAdr;
@ -81,7 +78,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
logic ClearValid;
logic ClearDirty;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] HitWay, HitWaySaved, HitWayFinal;
logic [NUMWAYS-1:0] HitWay;
logic CacheHit;
logic SetDirty;
logic SetValid;
@ -107,9 +104,17 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
logic [NUMWAYS-1:0] SelectedWay;
logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay;
logic [1:0] CacheRW, CacheAtomic;
logic [LINELEN-1:0] ReadDataLine;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic save, restore;
logic SelBusBuffer;
logic SRAMEnable;
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, LineByteMux;
genvar index;
/////////////////////////////////////////////////////////////////////////////////////////////
// Read Path
@ -123,51 +128,56 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN)
CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM));
.Invalidate(InvalidateCache));
if(NUMWAYS > 1) begin:vict
cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy(
.clk, .reset, .HitWay(HitWayFinal), .VictimWay, .RAdr, .LRUWriteEn);
.clk, .reset, .HitWay, .VictimWay, .RAdr, .LRUWriteEn);
end else assign VictimWay = 1'b1; // one hot.
assign CacheHit = | HitWay;
assign VictimDirty = | VictimDirtyWay;
// ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine));
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag));
// Because of the sram clocked read when the ieu is stalled the read data maybe lost.
// There are two ways to resolve. 1. We can replay the read of the sram or we can save
// the data. Replay is eaiser but creates a longer critical path.
// save/restore only wayhit and readdata.
if(!`REPLAY) begin
flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, HitWay, HitWaySaved);
mux2 #(NUMWAYS) saverestoremux(HitWay, HitWaySaved, restore, HitWayFinal);
end else assign HitWayFinal = HitWay;
// like to fix this.
if(DCACHE)
mux2 #(LOGWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(WordCount), .s(LSUBusWriteCrit),
.y(WordOffsetAddr));
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread(
.clk, .reset, .PAdr(WordOffsetAddr), .save, .restore,
mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, CacheBusWriteData, SelBusBuffer, ReadDataLine);
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
.PAdr(WordOffsetAddr),
.ReadDataLine, .ReadDataWord);
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
if (`LLEN>`XLEN)
mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
else
mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
.d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
logic [LINELEN-1:0] FinalWriteDataDup;
assign FinalWriteDataDup = {WORDSPERLINE{FinalWriteData}};
onehotdecoder #(LOGCWPL) adrdec(
.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign LineByteMux = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes.
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(FinalWriteDataDup[8*index+7:8*index]),
.d1(CacheBusWriteData[8*index+7:8*index]), .s(LineByteMux[index]), .y(CacheWriteData[8*index+7:8*index]));
end
//mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
// .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData));
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),
@ -191,7 +201,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write Enables
/////////////////////////////////////////////////////////////////////////////////////////////
mux3 #(NUMWAYS) selectwaymux(HitWayFinal, VictimWay, FlushWay,
mux3 #(NUMWAYS) selectwaymux(HitWay, VictimWay, FlushWay,
{SelFlush, SetValid}, SelectedWay);
assign SetValidWay = SetValid ? SelectedWay : '0;
assign ClearValidWay = ClearValid ? SelectedWay : '0;
@ -210,7 +220,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER
.ClearValid, .ClearDirty, .SetDirty,
.SetValid, .SelEvict, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst,
.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache,
.save, .restore,
.FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelBusBuffer,
.InvalidateCache,
.SRAMEnable,
.LRUWriteEn);
endmodule

View File

@ -32,50 +32,51 @@
module cachefsm
(input logic clk,
input logic reset,
input logic reset,
// inputs from IEU
input logic [1:0] CacheRW,
input logic [1:0] CacheAtomic,
input logic FlushCache,
input logic FlushCache,
input logic InvalidateCache,
// hazard inputs
input logic CPUBusy,
input logic CPUBusy,
// interlock fsm
input logic IgnoreRequestTLB,
input logic IgnoreRequestTrapM,
input logic IgnoreRequestTLB,
input logic IgnoreRequestTrapM,
input logic TrapM,
// Bus inputs
input logic CacheBusAck,
input logic CacheBusAck,
// dcache internals
input logic CacheHit,
input logic VictimDirty,
input logic FlushAdrFlag,
input logic FlushWayFlag,
input logic CacheHit,
input logic VictimDirty,
input logic FlushAdrFlag,
input logic FlushWayFlag,
// hazard outputs
output logic CacheStall,
output logic CacheStall,
// counter outputs
output logic CacheMiss,
output logic CacheAccess,
output logic CacheMiss,
output logic CacheAccess,
// Bus outputs
output logic CacheCommitted,
output logic CacheWriteLine,
output logic CacheFetchLine,
output logic CacheCommitted,
output logic CacheWriteLine,
output logic CacheFetchLine,
// dcache internals
output logic SelAdr,
output logic ClearValid,
output logic ClearDirty,
output logic SetDirty,
output logic SetValid,
output logic SelEvict,
output logic LRUWriteEn,
output logic SelFlush,
output logic FlushAdrCntEn,
output logic FlushWayCntEn,
output logic FlushAdrCntRst,
output logic FlushWayCntRst,
output logic save,
output logic restore);
output logic SelAdr,
output logic ClearValid,
output logic ClearDirty,
output logic SetDirty,
output logic SetValid,
output logic SelEvict,
output logic LRUWriteEn,
output logic SelFlush,
output logic FlushAdrCntEn,
output logic FlushWayCntEn,
output logic FlushAdrCntRst,
output logic FlushWayCntRst,
output logic SelBusBuffer,
output logic SRAMEnable);
logic resetDelay;
logic AMO;
@ -87,20 +88,13 @@ module cachefsm
typedef enum logic [3:0] {STATE_READY, // hit states
// miss states
STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_EVICT_DIRTY,
STATE_MISS_WRITE_CACHE_LINE,
STATE_MISS_READ_WORD,
STATE_MISS_READ_WORD_DELAY,
STATE_MISS_WRITE_WORD,
// cpu stalled replay/restore state
STATE_CPU_BUSY,
// flush cache
STATE_FLUSH,
STATE_FLUSH_CHECK,
STATE_FLUSH_INCR,
STATE_FLUSH_WRITE_BACK,
STATE_FLUSH_CLEAR_DIRTY} statetype;
STATE_FLUSH_WRITE_BACK} statetype;
(* mark_debug = "true" *) statetype CurrState, NextState;
logic IgnoreRequest;
@ -115,7 +109,7 @@ module cachefsm
assign DoRead = CacheRW[1] & ~IgnoreRequest;
assign DoWrite = CacheRW[0] & ~IgnoreRequest;
assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit;
assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit & ~InvalidateCache;
assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit;
assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit);
assign FlushFlag = FlushAdrFlag & FlushWayFlag;
@ -136,38 +130,31 @@ module cachefsm
always_comb begin
NextState = STATE_READY;
case (CurrState)
STATE_READY: if(IgnoreRequest) NextState = STATE_READY;
else if(DoFlush) NextState = STATE_FLUSH;
else if(DoAnyHit & CPUBusy) NextState = STATE_CPU_BUSY;
else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; // change
else NextState = STATE_READY;
STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_FETCH_DONE;
else NextState = STATE_MISS_FETCH_WDV;
STATE_MISS_FETCH_DONE: if(VictimDirty) NextState = STATE_MISS_EVICT_DIRTY;
else NextState = STATE_MISS_WRITE_CACHE_LINE;
STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_WORD;
STATE_MISS_READ_WORD: if(CacheRW[0] & ~AMO) NextState = STATE_MISS_WRITE_WORD;
else NextState = STATE_MISS_READ_WORD_DELAY;
STATE_MISS_READ_WORD_DELAY: if(CPUBusy) NextState = STATE_CPU_BUSY;
else NextState = STATE_READY;
STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY;
else NextState = STATE_READY;
STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE;
else NextState = STATE_MISS_EVICT_DIRTY;
STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY;
else NextState = STATE_READY;
STATE_FLUSH: NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK;
else if(FlushFlag) NextState = STATE_READY;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_WRITE_BACK: if(CacheBusAck) NextState = STATE_FLUSH_CLEAR_DIRTY;
else NextState = STATE_FLUSH_WRITE_BACK;
STATE_FLUSH_CLEAR_DIRTY: if(FlushFlag) NextState = STATE_READY;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_FLUSH_CHECK;
default: NextState = STATE_READY;
STATE_READY: if(IgnoreRequest | InvalidateCache) NextState = STATE_READY;
else if(DoFlush) NextState = STATE_FLUSH;
// Delayed LRU update. Cannot check if victim line is dirty on this cycle.
// To optimize do the fetch first, then eviction if necessary.
else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV;
else NextState = STATE_READY;
STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE;
else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY;
else NextState = STATE_MISS_FETCH_WDV;
STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY;
STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE;
else NextState = STATE_MISS_EVICT_DIRTY;
// eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack.
STATE_FLUSH: NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK;
else if(FlushFlag) NextState = STATE_READY;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK;
STATE_FLUSH_WRITE_BACK: if(CacheBusAck) begin
if(FlushFlag) NextState = STATE_READY;
else if(FlushWayFlag) NextState = STATE_FLUSH_INCR;
else NextState = STATE_FLUSH_CHECK;
end else NextState = STATE_FLUSH_WRITE_BACK;
default: NextState = STATE_READY;
endcase
end
@ -175,63 +162,48 @@ module cachefsm
assign CacheCommitted = CurrState != STATE_READY;
assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) |
(CurrState == STATE_MISS_FETCH_WDV) |
(CurrState == STATE_MISS_FETCH_DONE) |
(CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE) |
(CurrState == STATE_MISS_READ_WORD) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write.
(CurrState == STATE_FLUSH) |
(CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) |
(CurrState == STATE_FLUSH_INCR) |
(CurrState == STATE_FLUSH_WRITE_BACK) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag));
(CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck);
// write enables internal to cache
assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE;
assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) |
(CurrState == STATE_MISS_READ_WORD_DELAY & AMO) |
(CurrState == STATE_MISS_WRITE_WORD);
(CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0]));
assign ClearValid = '0;
assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY);
assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) |
(CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck);
assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) |
(CurrState == STATE_MISS_READ_WORD_DELAY) |
(CurrState == STATE_MISS_WRITE_WORD);
(CurrState == STATE_MISS_WRITE_CACHE_LINE);
// Flush and eviction controls
assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY);
assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty);
assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) |
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY);
(CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK);
assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag;
assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayAndNotAdrFlag) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayAndNotAdrFlag);
(CurrState == STATE_FLUSH_WRITE_BACK & FlushWayAndNotAdrFlag & CacheBusAck);
assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) |
(CurrState == STATE_FLUSH_CLEAR_DIRTY & ~FlushFlag);
(CurrState == STATE_FLUSH_WRITE_BACK & ~FlushFlag & CacheBusAck);
assign FlushAdrCntRst = (CurrState == STATE_READY);
assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR);
// Bus interface controls
assign CacheFetchLine = (CurrState == STATE_READY & DoAnyMiss);
assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) |
assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) |
(CurrState == STATE_FLUSH_CHECK & VictimDirty);
// handle cpu stall.
assign restore = ((CurrState == STATE_CPU_BUSY)) & ~`REPLAY;
assign save = ((CurrState == STATE_READY & DoAnyHit & CPUBusy) |
(CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | CacheRW[1]) & CPUBusy) |
(CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY;
// **** can this be simplified?
assign SelAdr = (CurrState == STATE_READY & (IgnoreRequestTLB & ~TrapM)) | // Ignore Request is needed on TLB miss.
// use the raw requests as we don't want IgnoreRequestTrapM in the critical path
(CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed
(CurrState == STATE_READY & (CacheRW[1] & CacheHit) & (CPUBusy & `REPLAY)) |
(CurrState == STATE_READY & (DoAnyMiss)) |
(CurrState == STATE_MISS_FETCH_WDV) |
(CurrState == STATE_MISS_FETCH_DONE) |
(CurrState == STATE_MISS_EVICT_DIRTY) |
(CurrState == STATE_MISS_WRITE_CACHE_LINE) |
(CurrState == STATE_MISS_READ_WORD) |
(CurrState == STATE_MISS_READ_WORD_DELAY & (AMO | (CPUBusy & `REPLAY))) |
(CurrState == STATE_MISS_WRITE_WORD) |
(CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) |
resetDelay;
assign SelBusBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE;
assign SRAMEnable = (CurrState == STATE_READY & ~CPUBusy | CacheStall) | (CurrState != STATE_READY) | reset;
endmodule // cachefsm

View File

@ -33,12 +33,12 @@
module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) (
input logic clk,
input logic ce,
input logic reset,
input logic [$clog2(NUMLINES)-1:0] RAdr,
input logic [`PA_BITS-1:0] PAdr,
input logic [LINELEN-1:0] CacheWriteData,
input logic FLoad2,
input logic SetValidWay,
input logic ClearValidWay,
input logic SetDirtyWay,
@ -48,16 +48,19 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic VictimWay,
input logic FlushWay,
input logic Invalidate,
input logic [(`XLEN-1)/8:0] ByteMask,
// input logic [(`XLEN-1)/8:0] ByteMask,
input logic [LINELEN/8-1:0] LineByteMask,
output logic [LINELEN-1:0] ReadDataLineWay,
output logic HitWay,
output logic VictimDirtyWay,
output logic [TAGLEN-1:0] VictimTagWay);
localparam WORDSPERLINE = LINELEN/`XLEN;
localparam integer WORDSPERLINE = LINELEN/`XLEN;
localparam integer BYTESPERLINE = LINELEN/8;
localparam LOGWPL = $clog2(WORDSPERLINE);
localparam LOGXLENBYTES = $clog2(`XLEN/8);
localparam integer BYTESPERWORD = `XLEN/8;
logic [NUMLINES-1:0] ValidBits;
logic [NUMLINES-1:0] DirtyBits;
@ -69,29 +72,23 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic SelTag;
logic [$clog2(NUMLINES)-1:0] RAdrD;
logic [2**LOGWPL-1:0] MemPAdrDecoded;
logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn;
logic [(`XLEN-1)/8:0] FinalByteMask;
logic SelectedWriteWordEn;
// logic [WORDSPERLINE-1:0] SelectedWriteWordEn;
// logic [(`XLEN-1)/8:0] FinalByteMask;
logic [LINELEN/8-1:0] FinalByteMask;
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux
/////////////////////////////////////////////////////////////////////////////////////////////
if(`LLEN>`XLEN)begin
logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
end else
onehotdecoder #(LOGWPL) adrdec(
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
// If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR
assign SelectedWriteWordEn = SetValidWay | SetDirtyWay;// ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR
/////////////////////////////////////////////////////////////////////////////////////////////
// Tag Array
/////////////////////////////////////////////////////////////////////////////////////////////
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk,
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce,
.Adr(RAdr), .ReadData(ReadTag), .ByteMask('1),
.CacheWriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(SetValidWay));
@ -106,11 +103,18 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
/////////////////////////////////////////////////////////////////////////////////////////////
genvar words;
for(words = 0; words < LINELEN/`XLEN; words++) begin: word
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr),
.ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ),
.CacheWriteData(CacheWriteData[(words+1)*`XLEN-1:words*`XLEN]),
.WriteEnable(SelectedWriteWordEn[words]), .ByteMask(FinalByteMask));
localparam integer SRAMLEN = 128;
localparam integer NUMSRAM = LINELEN/SRAMLEN;
localparam integer SRAMLENINBYTES = SRAMLEN/8;
localparam integer LOGNUMSRAM = $clog2(NUMSRAM);
for(words = 0; words < NUMSRAM; words++) begin: word
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .Adr(RAdr),
.ReadData(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.CacheWriteData(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
//.WriteEnable(1'b1), .ByteMask(SRAMLineByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
.WriteEnable(SelectedWriteWordEn), .ByteMask(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
end
// AND portion of distributed read multiplexers
@ -126,7 +130,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
else if (SetValidWay) ValidBits[RAdr] <= #1 1'b1;
else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0;
end
flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD);
flopen #($clog2(NUMLINES)) RAdrDelayReg(clk, ce, RAdr, RAdrD);
assign Valid = ValidBits[RAdrD];
/////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -37,6 +37,7 @@
module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
input logic ce,
input logic [$clog2(DEPTH)-1:0] Adr,
input logic [WIDTH-1:0] CacheWriteData,
input logic WriteEnable,
@ -46,13 +47,14 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
logic [WIDTH-1:0] StoredData[DEPTH-1:0];
logic [$clog2(DEPTH)-1:0] AdrD;
always_ff @(posedge clk) AdrD <= Adr;
always_ff @(posedge clk) if(ce) AdrD <= Adr;
genvar index;
if (`USE_SRAM == 1) begin
// 64 x 128-bit SRAM
// check if the size is ok, complain if not***
logic [WIDTH-1:0] BitWriteMask;
for (index=0; index < WIDTH; index++)
assign BitWriteMask[index] = ByteMask[index/8];
@ -65,13 +67,13 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
end else begin
if (WIDTH%8 != 0) // handle msbs if not a multiple of 8
always_ff @(posedge clk)
if (WriteEnable & ByteMask[WIDTH/8])
if (ce & WriteEnable & ByteMask[WIDTH/8])
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1
CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
for(index = 0; index < WIDTH/8; index++)
always_ff @(posedge clk)
if(WriteEnable & ByteMask[index])
if(ce & WriteEnable & ByteMask[index])
StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8];
assign ReadData = StoredData[AdrD];

View File

@ -30,11 +30,8 @@
`include "wally-config.vh"
module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
input logic clk,
input logic reset,
module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)(
input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr,
input logic save, restore,
input logic [LINELEN-1:0] ReadDataLine,
output logic [WORDLEN-1:0] ReadDataWord);
@ -43,7 +40,6 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
localparam PADLEN = WORDLEN-MUXINTERVAL;
logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad;
logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0];
logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved;
if (PADLEN > 0) begin
logic [PADLEN-1:0] Pad;
@ -56,11 +52,5 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)];
end
// variable input mux
// *** maybe remove REPLAY config later after deciding which way is best
assign ReadDataWordRaw = ReadDataLineSets[PAdr];
if(!`REPLAY) begin
flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved);
mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved,
restore, ReadDataWord);
end else assign ReadDataWord = ReadDataWordRaw;
assign ReadDataWord = ReadDataLineSets[PAdr];
endmodule

View File

@ -49,7 +49,6 @@ module ahblite (
input logic [2:0] IFUBurstType,
input logic [1:0] IFUTransType,
input logic IFUTransComplete,
input logic [(`XLEN-1)/8:0] ByteMaskM,
// Signals from Data Cache
input logic [`PA_BITS-1:0] LSUBusAdr,
@ -157,8 +156,7 @@ module ahblite (
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
assign HMASTLOCK = 0; // no locking supported
assign HWRITE = (NextBusState == MEMWRITE);
//assign HWSTRB = ByteMaskM;
// Byte mask for HWSTRB
// Byte mask for HWSTRB
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB));
// delay write data by one cycle for

View File

@ -60,10 +60,11 @@ module cvtshiftcalc(
// - otherwise:
// | LzcInM | 0's if nessisary |
// change to int shift to the left one
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} :
{CvtLzcIn, {`NF+1{1'b0}}};
always_comb
if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin

View File

@ -1,10 +1,11 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`DIVb-(`RADIX/4):0] DivQm,
input logic [`FMTBITS-1:0] Fmt,
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
input logic [`NE+1:0] DivCalcExp,
input logic Sqrt,
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic [`NE+1:0] DivQe,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm,
@ -14,27 +15,28 @@ module divshiftcalc(
// is the result denromalized
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
// Left shift amount = DivQe+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`NE+2)'(`NF);
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~(DivDenormShift[`NE+1]|Sqrt)}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
// *** may be able to reduce shifter size
assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1+(`RADIX/4)-`NF{1'b0}}};
endmodule

View File

@ -34,35 +34,37 @@ module divsqrt(
input logic clk,
input logic reset,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NF:0] XManE, YManE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XsE,
input logic [`NF:0] XmE, YmE,
input logic [`NE-1:0] XeE, YeE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStartE,
input logic StallM,
input logic StallE,
output logic DivStickyM,
output logic DivNegStickyM,
input logic StallE,
input logic SqrtE, SqrtM,
output logic DivSM,
output logic DivBusy,
output logic DivDone,
output logic [`NE+1:0] DivCalcExpM,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [`DIVLEN+2:0] QuotM
output logic [`NE+1:0] QeM,
output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`DIVb-(`RADIX/4):0] QmM
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] WS, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DIVb+3:0] NextWSN, NextWCN;
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] StickyWSA;
logic [`DIVb:0] X;
logic [`DIVN-2:0] Dpreproc;
logic [`DURLEN-1:0] Dur;
logic NegSticky;
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
.StickyWSA, .DivBusy, .Qm(QmM));
endmodule

View File

@ -29,29 +29,29 @@
`include "wally-config.vh"
module fclassify (
input logic XSgnE, // sign bit
input logic XNaNE, // is NaN
input logic XSNaNE, // is signaling NaN
input logic XDenormE, // is denormal
input logic XZeroE, // is zero
input logic XInfE, // is infinity
output logic [`XLEN-1:0] ClassResE // classify result
);
input logic Xs, // sign bit
input logic XNaN, // is NaN
input logic XSNaN, // is signaling NaN
input logic XDenorm,// is denormal
input logic XZero, // is zero
input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassRes// classify result
);
logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm;
logic XNormE;
logic XNorm;
// determine the sub categories
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
assign PInf = ~XSgnE&XInfE;
assign NInf = XSgnE&XInfE;
assign PNorm = ~XSgnE&XNormE;
assign NNorm = XSgnE&XNormE;
assign PDenorm = ~XSgnE&XDenormE;
assign NDenorm = XSgnE&XDenormE;
assign PZero = ~XSgnE&XZeroE;
assign NZero = XSgnE&XZeroE;
assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
assign PInf = ~Xs&XInf;
assign NInf = Xs&XInf;
assign PNorm = ~Xs&XNorm;
assign NNorm = Xs&XNorm;
assign PDenorm = ~Xs&XDenorm;
assign NDenorm = Xs&XDenorm;
assign PZero = ~Xs&XZero;
assign NZero = Xs&XZero;
// determine sub category and combine into the result
// bit 0 - -Inf
@ -64,6 +64,6 @@ module fclassify (
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

View File

@ -27,9 +27,10 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// FOpCtrlE values
// OpCtrl values
// 110 min
// 101 max
// 010 equal
@ -37,36 +38,32 @@
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // see above table
input logic XSgnE, YSgnE, // input signs
input logic [`NE-1:0] XExpE, YExpE, // input exponents
input logic [`NF:0] XManE, YManE, // input mantissa
input logic XZeroE, YZeroE, // is zero
input logic XNaNE, YNaNE, // is NaN
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
output logic CmpNVE, // invalid flag
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
output logic [`XLEN-1:0] CmpIntResE // compare resilt
input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] OpCtrl, // see above table
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZero, YZero, // is zero
input logic XNaN, YNaN, // is NaN
input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntRes // compare integer result
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes;
logic BothZero, EitherNaN, EitherSNaN;
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
// assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression
assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
assign EQ = (X == Y);
//assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
//assign LT = XInt < YInt;
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
assign BothZero = XZero&YZero;
assign EitherNaN = XNaN|YNaN;
assign EitherSNaN = XSNaN|YSNaN;
// flags
@ -74,78 +71,91 @@ module fcmp (
// LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (FOpCtrlE[2:0])
3'b110: CmpNVE = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal
default: CmpNVE = 1'bx;
case (OpCtrl[2:0])
3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNV = 1'bx;
endcase
end
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X
// for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
else assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3)
always_comb
case (FmtE)
case (Fmt)
`FMT:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}};
endcase
else if (`FPSIZES == 4)
always_comb
case (FmtE)
case (Fmt)
2'h3:
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
endcase
// when one input is a NaN -output the non-NaN
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
always_comb
if(OpCtrl[0]) // MAX
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
endmodule

View File

@ -29,25 +29,44 @@
`include "wally-config.vh"
module fctrl (
input logic clk,
input logic reset,
input logic StallE, StallM, StallW, // stall signals
input logic FlushE, FlushM, FlushW, // flush signals
input logic [31:0] InstrD,
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
input logic [6:0] OpD, // bits 6:0 of instruction
input logic [4:0] Rs2D, // bits 24:20 of instruction
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
input logic [2:0] FRM_REGW, // rounding mode from CSR
input logic [1:0] STATUS_FS, // is FPU enabled?
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] PostProcSelD,
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
input logic FDivBusyE, // is the divider busy
output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
output logic FRegWriteM, FRegWriteW, // FP register write enable
output logic [2:0] FrmM, // FP rounding mode
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
output logic DivStartE, // Start division or squareroot
output logic XEnE, YEnE, ZEnE,
output logic YEnForwardE, ZEnForwardE,
output logic FWriteIntE, FWriteIntM, // Write to integer register
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
);
`define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD;
logic IllegalFPUInstrE;
logic FRegWriteD; // FP register write enable
logic DivStartD; // integer register write enable
logic FWriteIntD; // integer register write enable
logic FRegWriteE; // FP register write enable
logic [2:0] OpCtrlD; // Select which opperation to do in each component
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [`FMTBITS-1:0] FmtD; // FP format
//*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder
always_comb
@ -130,7 +149,7 @@ module fctrl (
endcase
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -155,6 +174,20 @@ module fctrl (
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// enables:
// X - all except int->fp, store, load, mv int->fp
// Y - all except cvt, mv, load, class, sqrt
// Z - fma ops only
// load/store mv int->fp cvt int->fp
assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2]));
// load/class mv cvt
assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0]))));
assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]);
assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&((PostProcSelE==2'b00)|((PostProcSelE==2'b01)&OpCtrlE[0]))));
assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2];
// Final Res Sel:
// fp int
// 00 other cmp
@ -168,7 +201,7 @@ module fctrl (
// 10 fma
// Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00
// 001 - negate sign 00
// 010 - xor sign 00
@ -186,8 +219,8 @@ module fctrl (
// 110 - add
// 111 - sub
// Div:
// 0 - ???
// 1 - ???
// 0 - div
// 1 - sqrt
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
// Cvt Fp: output format
// 10 - to half
@ -205,5 +238,24 @@ module fctrl (
// 01 - negate sign
// 10 - xor sign
// D/E pipleine register
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
if(`FLEN>`XLEN)
flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE);
// E/M pipleine register
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
if(`FLEN>`XLEN)
flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM);
// M/W pipleine register
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM},
{FRegWriteW, FResSelW});
endmodule

View File

@ -35,7 +35,7 @@ module fcvt (
input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero
input logic XDenorm, // is the input denormalized
@ -68,21 +68,22 @@ module fcvt (
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability
assign Signed = FOpCtrl[0];
assign Int64 = FOpCtrl[1];
assign IntToFp = FOpCtrl[2];
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT);
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0];
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
///////////////////////////////////////////////////////////////////////////
@ -102,10 +103,11 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// shifter
@ -119,13 +121,14 @@ module fcvt (
// denormalized/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by LeadingZeros+1 - to shift till the result is normalized
// - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] :
(LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
always_comb
if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
else if (ResDenormUf&~IntToFp) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
else ShiftAmt = LeadingZeros;
///////////////////////////////////////////////////////////////////////////
// exp calculations
@ -148,7 +151,9 @@ module fcvt (
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
logic [`NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp;
@ -175,7 +180,7 @@ module fcvt (
// select the old exponent
// int -> fp : largest bias + XLEN
// fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe;
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
// calculate CalcExp
// fp -> fp :
@ -197,14 +202,14 @@ module fcvt (
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
// - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
// Process:
// - shifted right by XLEN (XLEN)
// - shift left to normilize (-1-LeadingZeros)
// - shift left to normilize (-LeadingZeros)
// - newBias to make the biased exponent
// oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
@ -220,7 +225,11 @@ module fcvt (
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs;
always_comb
if(IntToFp)
if(Int64) Cs = Int[`XLEN-1]&Signed;
else Cs = Int[31]&Signed;
else Cs = Xs;
endmodule

View File

@ -31,49 +31,51 @@
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
input logic XEnE, YEnE, ZEnE,
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
);
always_comb begin
// set defaults
FForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
if(XEnE)
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
if(YEnE)
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
if(ZEnE)
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
end

View File

@ -34,24 +34,23 @@ module flags(
input logic XInf, YInf, ZInf, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZero, YZero, // inputs are zero
input logic XNaN, YNaN, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic XZero, YZero, // inputs are zero
input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Nexp, // exponent of the normalized sum
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Me, // exponent of the normalized sum
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
input logic FmaAs, FmaPs, // the product and modified Z signs
input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
input logic R, UfL, S, UfPlus1, // bits used to determine rounding
output logic DivByZero,
output logic IntInvalid, Invalid, Overflow, // flags used to select the res
output logic [4:0] PostProcFlg // flags
@ -73,30 +72,30 @@ module flags(
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
endcase
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
@ -110,14 +109,14 @@ module flags(
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
// detecting tininess after rounding
// the exponent is negitive
@ -127,11 +126,11 @@ module flags(
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero|Invalid);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
@ -153,18 +152,18 @@ module flags(
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf);
assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn);
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
// Combine flags
// - to integer results do not set the underflow or overflow flags

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com, David Harris
// Modified: 6/23/2021
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: Floating point multiply-accumulate of configurable size
//
@ -34,7 +34,7 @@ module fma(
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic ZmSticky, // sticky bit that is calculated during alignment
@ -44,14 +44,15 @@ module fma(
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count
output logic Ss, // the sum's sign
output logic [`NE+1:0] Se,
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count
);
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted
logic [3*`NF+5:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
@ -62,234 +63,29 @@ module fma(
// calculate the product's exponent
expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe);
fmaexpadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe);
// multiplication of the mantissa's
mult mult(.Xm, .Ym, .Pm);
fmamult mult(.Xm, .Ym, .Pm);
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// calculate the signs and take the opperation into account
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
fmalza #(3*`NF+6) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .sub(InvA), .SCnt);
endmodule
module expadd(
input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
// kill the exponent if the product is zero - either X or Y is 0
assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}};
endmodule
module mult(
input logic [`NF:0] Xm, Ym,
output logic [2*`NF+1:0] Pm
);
assign Pm = Xm * Ym;
endmodule
module sign(
input logic [2:0] FOpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As // aligned addend sign used in fma - takes opperation into account
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
// flip if subtraction
assign As = Zs^FOpCtrl[0];
endmodule
module align(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ZmSticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProd = ACnt[`NE+1]|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = ZmPreshifted;
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ZmSticky = ~ZZero;
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ZmSticky = |(ZmShifted[`NF-1:0]);
end
end
assign Am = ZmShifted[4*`NF+5:`NF];
endmodule
module add(
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic KillProd, // should the product be set to 0
input logic XZero, YZero, // is the input zero
output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic InvA, // do you invert the aligned addend
output logic [3*`NF+5:0] Sm, // the positive sum
output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum
);
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvA = As ^ Ps;
// Choose an inverted or non-inverted addend - the one has to be added now for the LZA
assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = Pm&{2*`NF+2{~KillProd}};
// Do the addition
// - calculate a positive and negitive sum in parallel
assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
// Is the sum negitive
assign NegSum = PreSum[3*`NF+6];
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
logic [3*`NF+6:0] G;
logic [3*`NF+6:0] Z;
logic [3*`NF+6:0] f;
assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
assign G[3*`NF+6:2*`NF+4] = 0;
assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
assign T[1:0] = A[1:0];
assign G[1:0] = 0;
assign Z[1:0] = ~A[1:0];
// Apply function to determine Leading pattern
// - note: the paper linked above uses the numbering system where 0 is the most significant bit
//f[n] = ~T[n]&T[n-1] note: n is the MSB
//f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1]))
assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5];
assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1}));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
endmodule

View File

@ -0,0 +1,75 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA significand adder
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaadd(
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic InvA, // invert the aligned addend
input logic KillProd, // should the product be set to 0
input logic ZmSticky,
input logic [`NE-1:0] Ze,
input logic [`NE+1:0] Pe,
output logic [3*`NF+5:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSum, // was the sum negitive
output logic Ss,
output logic [`NE+1:0] Se,
output logic [3*`NF+5:0] Sm // the positive sum
);
logic [3*`NF+5:0] PreSum, NegPreSum; // possibly negitive sum
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
assign AmInv = InvA ? ~Am : Am;
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = KillProd ? '0 : Pm;
// Do the addition
// - calculate a positive and negitive sum in parallel
// Zsticky Psticky
// PreSum -1 = don't add 1 +1 = add 2
// NegPreSum +1 = add 2 -1 = don't add 1
// for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum : PreSum;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Ss = NegSum^Ps;
assign Se = KillProd ? {2'b0, Ze} : Pe;
endmodule

View File

@ -0,0 +1,101 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA alginment shift
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaalign(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ZmSticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ZmSticky = ~ZZero;
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ZmSticky = |(ZmShifted[`NF-1:0]);
end
end
assign Am = ZmShifted[4*`NF+5:`NF];
endmodule

View File

@ -0,0 +1,45 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA exponent addition
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmaexpadd(
input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
logic PZero;
// kill the exponent if the product is zero - either X or Y is 0
assign PZero = XZero | YZero;
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
endmodule

View File

@ -0,0 +1,60 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: Leading Zero Anticipator
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [WIDTH-1:0] A, // addend
input logic [2*`NF+3:0] Pm, // product
input logic Cin, // carry in
input logic sub,
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
);
logic [WIDTH:0] F;
logic [WIDTH-1:0] B, P, G, K;
logic [WIDTH-1:0] Pp1, Gm1, Km1;
assign B = {{(`NF+2){1'b0}}, Pm}; // Zero extend product
assign P = A^B;
assign G = A&B;
assign K= ~A&~B;
assign Pp1 = {sub, P[WIDTH-1:1]};
assign Gm1 = {G[WIDTH-2:0], Cin};
assign Km1 = {K[WIDTH-2:0], ~Cin};
// Apply function to determine Leading pattern
// - note: the paper linked above uses the numbering system where 0 is the most significant bit
assign F[WIDTH] = ~sub&P[WIDTH-1];
assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1));
lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt));
endmodule

View File

@ -0,0 +1,38 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA Significand Multiplier
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmamult(
input logic [`NF:0] Xm, Ym,
output logic [2*`NF+1:0] Pm
);
assign Pm = Xm * Ym;
endmodule

View File

@ -32,55 +32,57 @@ module fmashiftcalc(
input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic [`NE-1:0] Ze, // exponent of Z
input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero
input logic ZDenorm,
output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSmZero, // is the result denormalized - calculated before LZA corection
input logic [`NE+1:0] FmaSe,
output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic FmaSZero, // is the result denormalized - calculated before LZA corection
output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
);
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] BiasCorr;
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign FmaSmZero = ~(|FmaSm);
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign FmaConvNormSumExp = NormSumExp;
assign NormSumExp = PreNormSumExp;
end else if (`FPSIZES == 2) begin
assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: FmaConvNormSumExp = NormSumExp;
`FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: FmaConvNormSumExp = {`NE+2{1'bx}};
`FMT: BiasCorr = '0;
`FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
`FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
default: BiasCorr = 'x;
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: FmaConvNormSumExp = NormSumExp;
2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
2'h3: BiasCorr = '0;
2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end
@ -88,52 +90,52 @@ module fmashiftcalc(
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
`FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
endcase
end
end
@ -144,13 +146,13 @@ module fmashiftcalc(
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
// assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
// - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm};
assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift;
if (`FPSIZES == 1)
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
else
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
endmodule

View File

@ -0,0 +1,49 @@
///////////////////////////////////////////
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: FMA Sign Logic
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmasign(
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As, // aligned addend sign used in fma - takes opperation into account
output logic InvA // Effective subtraction: invert addend
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
// flip addend sign for subtraction
assign As = Zs^OpCtrl[0];
// Effective subtraction when product and addend have opposite signs
assign InvA = As ^ Ps;
endmodule

View File

@ -30,28 +30,28 @@
`include "wally-config.vh"
module fpu (
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`FLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadStoreM, // Fp load instruction?
output logic FLoad2,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
input logic [31:0] InstrD, // instruction (from IFU)
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic FStore2, // store two words into memory (to LSU)
output logic FStallD, // Stall the decode stage (To HZU)
output logic FWriteIntE, // integer register write enable (to IEU)
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic [1:0] FResSelW, // final result selection (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
);
@ -62,98 +62,91 @@ module fpu (
// - sets the underflow after rounding
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic FWriteIntM; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic DivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
logic IllegalFPUInstrM;
logic XEnE, YEnE, ZEnE;
logic YEnForwardE, ZEnForwardE;
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
logic XsE, YsE, ZsE; // input's sign - execute stage
logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvAE, InvAM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
logic [3*`NF+5:0] SmE, SmM;
logic [`NE+1:0] PeE, PeM;
logic ZmStickyE, ZmStickyM;
logic [`NE+1:0] SeE,SeM;
logic KillProdE, KillProdM;
logic InvAE, InvAM;
logic NegSumE, NegSumM;
logic AsE, AsM;
logic PsE, PsM;
logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic [`NE:0] CeE, CeM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic CsE, CsM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`DIVLEN+2:0] QuotE, QuotM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
logic DivNegStickyE, DivNegStickyM;
logic DivStickyE, DivStickyM;
logic DivDoneM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
logic [`DIVb-(`RADIX/4):0] QmM;
logic [`NE+1:0] QeE, QeM;
logic DivSE, DivSM;
logic DivDoneM;
logic [`DURLEN-1:0] EarlyTermShiftM;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
logic [`XLEN-1:0] ClassResE; // classify result
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] ClassResE; // classify result
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
@ -170,9 +163,11 @@ module fpu (
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
.FmtD, .FrmD, .FWriteIntD);
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -184,12 +179,6 @@ module fpu (
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
// EXECUTION STAGE
@ -206,12 +195,12 @@ module fpu (
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
.XEnE, .YEnE(YEnForwardE), .ZEnE(ZEnForwardE), .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
generate
@ -226,7 +215,7 @@ module fpu (
endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
generate
@ -240,80 +229,76 @@ module fpu (
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
// unpack unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
// fma - does multiply, add, and multiply-add instructions
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE),
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE),
.Xm(XManE), .Ym(YManE), .Zm(ZManE),
// fused multiply add
// - fadd/fsub
// - fmul
// - fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE),
.Sm(SumE), .Pe(ProdExpE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE));
.OpCtrl(OpCtrlE), .Fmt(FmtE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
.Sm(SmE), .Pe(PeE),
.NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE),
.ZmSticky(ZmStickyE), .KillProd(KillProdE));
// // fpdivsqrt using Goldschmidt's iteration
// if(`FLEN == 64) begin
// flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// else if (`FLEN == 32) begin
// flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
// .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
// divide and squareroot
// - fdiv
// - fsqrt
// *** add other opperations
divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .XsE,
.StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
.EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
// compare
// - fmin/fmax
// - flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection
// - fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE),
// classify
// - fclass
fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert
// - fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE),
.LzcIn(CvtLzcInE));
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0];
assign FWriteDataE = YE[`XLEN-1:0];
end else begin
logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
else assign FLoad2 = FmtM;
if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM;
else assign FStore2 = FmtM&~IllegalFPUInstrM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
if (`FPSIZES==1) assign FWriteDataE = YE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
end
@ -330,14 +315,27 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
logic [`FLEN-1:0] SgnExtXE;
generate
if(`FPSIZES == 1)
assign SgnExtXE = XE;
else if(`FPSIZES == 2)
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]},
XE, FmtE, SgnExtXE); // NaN boxing zeroes
endgenerate
if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0];
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
@ -345,27 +343,24 @@ module fpu (
// E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
{ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
{CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE
@ -381,12 +376,12 @@ module fpu (
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
.FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
@ -395,9 +390,6 @@ module fpu (
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE

View File

@ -26,60 +26,59 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fsgninj (
input logic XSgnE, YSgnE, // X and Y sign bits
input logic [`FLEN-1:0] FSrcXE, // X
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [1:0] SgnOpCodeE, // operation control
output logic [`FLEN-1:0] SgnResE // result
input logic Xs, Ys, // X and Y sign bits
input logic [`FLEN-1:0] X, // X
input logic [`FMTBITS-1:0] Fmt, // format
input logic [1:0] OpCtrl, // operation control
output logic [`FLEN-1:0] SgnRes // result
);
logic ResSgn;
//op code designation:
//
//00 - fsgnj - directly copy over sign value of FSrcYE
//01 - fsgnjn - negate sign value of FSrcYE
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
//
// OpCtrl:
// 00 - fsgnj - directly copy over sign value of Y
// 01 - fsgnjn - negate sign value of Y
// 10 - fsgnjx - XOR sign values of X and Y
// calculate the result's sign
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
// format final result based on precision
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`FPSIZES == 1)
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2)
assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]};
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin
logic [2:0] SgnBits;
always_comb
case (FmtE)
`FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]};
case (Fmt)
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}};
endcase
assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]};
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin
logic [3:0] SgnBits;
always_comb
case (FmtE)
`Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]};
case (Fmt)
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn};
endcase
assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]};
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
end
endmodule

View File

@ -42,7 +42,12 @@ module negateintres(
// round and negate the positive res if needed
assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] :
Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32];
always_comb
if(Signed)
if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1];
else CvtNegResMsbs = CvtNegRes[32:31];
else
if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN];
else CvtNegResMsbs = CvtNegRes[33:32];
endmodule

177
pipelined/src/fpu/otfc.sv Normal file
View File

@ -0,0 +1,177 @@
///////////////////////////////////////////
// otfc.sv
//
// Written: me@KatherineParry.com, cturek@hmc.edu
// Modified:7/14/2022
//
// Purpose: On the fly conversion
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module otfc2 (
input logic qp, qz,
input logic [`DIVb:0] Q, QM,
output logic [`DIVb:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
// Use this otfc for division only.
logic [`DIVb-1:0] QR, QMR;
assign QR = Q[`DIVb-1:0];
assign QMR = QM[`DIVb-1:0]; // Shifted Q and QM
always_comb begin
if (qp) begin
QNext = {QR, 1'b1};
QMNext = {QR, 1'b0};
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
endmodule
///////////////////////////////
// Square Root OTFC, Radix 2 //
///////////////////////////////
module sotfc2(
input logic sp, sz,
input logic [`DIVb-1:0] C,
input logic [`DIVb:0] S, SM,
output logic [`DIVb:0] SNext, SMNext
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVb:0] CExt;
assign CExt = {1'b1, C};
always_comb begin
if (sp) begin
SNext = S | (CExt & ~(CExt << 1));
SMNext = S;
end else if (sz) begin
SNext = S;
SMNext = SM | (CExt & ~(CExt << 1));
end else begin // If sp and sz are not true, then sn is
SNext = SM | (CExt & ~(CExt << 1));
SMNext = SM;
end
end
endmodule
module otfc4 (
input logic [3:0] q,
input logic [`DIVb:0] Q, QM,
output logic [`DIVb:0] QNext, QMNext
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`DIVb-2:0] QR, QMR;
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
assign QR = Q[`DIVb-2:0];
assign QMR = QM[`DIVb-2:0]; // Shifted Q and QM
always_comb begin
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Qmeint is in the range [.5, 2)
endmodule
///////////////////////////////
// Square Root OTFC, Radix 4 //
///////////////////////////////
module sotfc4(
input logic [3:0] s,
input logic Sqrt,
input logic [`DIVLEN+3:0] S, SM,
input logic [`DIVLEN+3:0] C,
output logic [`DIVLEN+3:0] SNext, SMNext
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
always_comb begin
if (s[3]) begin
SNext = S | ((C << 1)&~(C << 2));
SMNext = S | (C&~(C << 1));
end else if (s[2]) begin
SNext = S | (C&~(C << 1));
SMNext = S;
end else if (s[1]) begin
SNext = SM | (C&~(C << 2));
SMNext = SM | ((C << 1)&~(C << 2));
end else if (s[0]) begin
SNext = SM | ((C << 1)&~(C << 2));
SMNext = SM | (C&~(C << 1));
end else begin // If sp and sn are not true, then sz is
SNext = S;
SMNext = SM | (C & ~(C << 2));
end
end
endmodule

View File

@ -29,14 +29,14 @@
`include "wally-config.vh"
module postprocess(
module postprocess (
// general signals
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, ZZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN
@ -46,20 +46,21 @@ module postprocess(
//fma signals
input logic FmaAs, // the modified Z sign - depends on instruction
input logic FmaPs, // the product's sign
input logic [`NE+1:0] FmaSe,
input logic [`NE+1:0] FmaPe, // Product exponent
input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic FmaZmSticky, // sticky bit that is calculated during alignment
input logic FmaZmS, // sticky bit that is calculated during alignment
input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter
input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
input logic FmaSs,
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
input logic DivSticky,
input logic DivNegSticky,
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivS,
input logic DivDone,
input logic [`NE+1:0] DivCalcExp,
input logic [`DIVLEN+2:0] Quot,
input logic [`NE+1:0] DivQe,
input logic [`DIVb-(`RADIX/4):0] DivQm,
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
@ -69,7 +70,7 @@ module postprocess(
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder)
input logic IntZero, // is the input zero
// final results
output logic [`FLEN-1:0] W, // FMA final result
output logic [`FLEN-1:0] PostProcRes, // FMA final result
output logic [4:0] PostProcFlg,
output logic [`XLEN-1:0] FCvtIntRes // the int conversion result
);
@ -78,32 +79,31 @@ module postprocess(
logic Ws;
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic Nsgn;
logic [`NE+1:0] Nexp;
logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow
logic Ms;
logic [`NE+1:0] Me;
logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic S; // S bit
logic UfPlus1; // do you add one (for determining underflow flag)
logic R; // bits needed to determine rounding
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Invalid; // flags
logic UfLSBRes;
logic UfL;
logic [`FMTBITS-1:0] OutFmt;
// fma signals
logic [`NE+1:0] FmaSe; // exponent of the normalized sum
logic FmaSmZero; // is the sum zero
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*`NF+8:0] FmaShiftIn; // shift input
logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
// division singals
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] DivShiftIn;
logic [`NE+1:0] DivCorrExp;
logic [`NE+1:0] Qe;
logic DivByZero;
logic DivResDenorm;
logic [`NE+1:0] DivDenormShift;
@ -125,26 +125,26 @@ module postprocess(
logic Sqrt;
// signals to help readability
assign Signed = FOpCtrl[0];
assign Int64 = FOpCtrl[1];
assign IntToFp = FOpCtrl[2];
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01)&DivDone;
assign Sqrt = FOpCtrl[0];
assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp);
assign InfIn = XInf|YInf|ZInf;
assign NaNIn = XNaN|YNaN|ZNaN;
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT);
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0];
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
@ -152,9 +152,9 @@ module postprocess(
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
.ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .Sqrt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSel)
@ -183,9 +183,9 @@ module postprocess(
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
.DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
.DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp,
.DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
.Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -199,19 +199,19 @@ module postprocess(
roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum,
.Xs, .Ys, .CvtCs, .Nsgn);
.Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
.FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf,
.DivSticky, .DivNegSticky, .DivDone,
.DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf,
.DivS, .DivDone,
.DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
.FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S,
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
///////////////////////////////////////////////////////////////////////////////
// Flags
@ -219,19 +219,19 @@ module postprocess(
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
.UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
.NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
.UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp,
.DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
.DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
endmodule

198
pipelined/src/fpu/qsel.sv Normal file
View File

@ -0,0 +1,198 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz//, qn
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Qmient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign qp = magnitude & ~sign;
assign qz = ~magnitude;
// assign #1 qn = magnitude & sign;
endmodule
////////////////////////////////////
// Adder Input Generation, Radix 2 //
////////////////////////////////////
module fgen2 (
input logic sp, sz,
input logic [`DIVb-1:0] C,
input logic [`DIVb:0] S, SM,
output logic [`DIVb+3:0] F
);
logic [`DIVb+3:0] FP, FN, FZ;
logic [`DIVb+3:0] SExt, SMExt, CExt;
assign SExt = {3'b0, S};
assign SMExt = {3'b0, SM};
assign CExt = {4'hf, C};
// Generate for both positive and negative bits
assign FP = ~(SExt << 1) & CExt;
assign FN = (SMExt << 1) | (CExt & (~CExt << 2));
assign FZ = '0;
// Choose which adder input will be used
always_comb
if (sp) F = FP;
else if (sz) F = FZ;
else F = FN;
endmodule
module qsel4 (
input logic [`DIVN-2:0] D,
input logic [`DIVb+3:0] WS, WC,
input logic Sqrt,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}};
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
always_comb begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-5) QSel4[i] = 4'b0000; // was -6
else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case
else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-17) QSel4[i] = 4'b0010; // was -18
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000; // was -8
else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case
else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=22) QSel4[i] = 4'b1000; // was 24
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-23) QSel4[i] = 4'b0010; // was -24
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
////////////////////////////////////
// Adder Input Generation, Radix 4 //
////////////////////////////////////
module fgen4 (
input logic [3:0] s,
input logic [`DIVLEN+3:0] C, S, SM,
output logic [`DIVLEN+3:0] F
);
logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2;
// Generate for both positive and negative bits
assign F2 = (~S << 2) & (C << 2);
assign F1 = ~(S << 1) & C;
assign F0 = '0;
assign FN1 = (SM << 1) | (C & ~(C << 3));
assign FN2 = (SM << 2) | ((C << 2)&~(C << 4));
// Choose which adder input will be used
always_comb
if (s[3]) F = F2;
else if (s[2]) F = F1;
else if (s[1]) F = FN1;
else if (s[0]) F = FN2;
else F = F0;
// assign F = sp ? FP : (sn ? FN : FZ);
endmodule

View File

@ -34,33 +34,43 @@ module resultsign(
input logic ZInf,
input logic InfIn,
input logic FmaOp,
input logic [`NE+1:0] FmaSe,
input logic FmaSmZero,
input logic [`NE+1:0] FmaMe,
input logic FmaSZero,
input logic Mult,
input logic R,
input logic S,
input logic Nsgn,
input logic Ms,
output logic Ws
);
logic ZeroSgn;
logic InfSgn;
logic Underflow;
// logic ResultSgnTmp;
logic Zeros;
logic Infs;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// The IEEE754-2019 standard specifies:
// - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
// - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
// - if x = +0 or -0 then x+x=x and x-(-x)=x
// - the sign of a product is the exclisive or or the opperand's signs
// Zero sign will only be selected if:
// - P=Z and a cancelation occurs - exact zero
// - Z is zero and P is zero - exact zero
// - P is killed and Z is zero - Psgn
// - Z is killed and P is zero - impossible
// Zero sign calculation:
// - if a multiply opperation is done, then use the products sign(Ps)
// - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign)
// - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign InfSgn = ZInf ? FmaAs : FmaPs;
assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
assign Infs = ZInf ? FmaAs : FmaPs;
always_comb
if(InfIn&FmaOp) Ws = Infs;
else if(FmaSZero&FmaOp) Ws = Zeros;
else Ws = Ms;
endmodule

View File

@ -46,36 +46,32 @@ module round(
input logic [1:0] PostProcSel,
input logic CvtResDenormUf,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] Nfrac,
input logic FmaZmSticky, // addend's sticky bit
input logic ZZero, // is Z zero
input logic FmaInvA, // invert Z
input logic [`NE+1:0] FmaSe, // exponent of the normalized sum
input logic Nsgn, // the result's sign
input logic [`CORRSHIFTSZ-1:0] Mf,
input logic FmaZmS, // addend's sticky bit
input logic [`NE+1:0] FmaMe, // exponent of the normalized sum
input logic Ms, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NE+1:0] DivCorrExp, // the calculated expoent
input logic DivSticky, // sticky bit
input logic DivNegSticky,
input logic [`NE+1:0] Qe, // the calculated expoent
input logic DivS, // sticky bit
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NF-1:0] Rf, // Result fraction
output logic [`NE-1:0] Re, // Result exponent
output logic S, // sticky bit
output logic [`NE+1:0] Nexp,
output logic [`NE+1:0] Me,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic R, UfLSBRes // bits needed to calculate rounding
output logic R, UfL // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic L; // bit used for rounding - least significant bit of the normalized sum
logic UfCalcPlus1;
logic NormS; // normalized sum's sticky bit
logic UfS; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic UfR;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -118,61 +114,61 @@ module round(
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end
@ -180,37 +176,37 @@ module round(
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
FpRound = Mf[`CORRSHIFTSZ-`NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
@ -222,130 +218,97 @@ module round(
always_comb
case (OutFmt)
2'h3: begin
FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
assign UfL = FpRound;
// determine sticky
assign S = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
// - for FMA or if division has a negitive sticky bit
assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
assign S = UfS | UfR;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even
3'b000: CalcPlus1 = R & (S| L);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down
3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up
3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = R;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (Frm)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (S | R);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
assign Minus1 = CalcMinus1 & (S | R);
assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
end else if (`FPSIZES == 3) begin
always_comb begin
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
end else if (`FPSIZES == 4) begin
always_comb begin
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
end else if (`FPSIZES == 4)
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
// determine the result to be roundned
assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSel)
2'b10: Nexp = FmaSe; // fma
2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
default: Nexp = '0;
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
2'b01: Me = DivDone ? Qe : '0; // divide
default: Me = '0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
assign Re = FullResExp[`NE-1:0];
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];
endmodule

View File

@ -34,27 +34,20 @@ module roundsign(
input logic Xs,
input logic Ys,
input logic FmaNegSum,
input logic Sqrt,
input logic FmaOp,
input logic DivOp,
input logic CvtOp,
input logic CvtCs,
output logic Nsgn
input logic FmaSs,
output logic Ms
);
logic FmaResSgnTmp;
logic DivSgn;
logic Qs;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage
// assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
assign DivSgn = Xs^Ys;
assign Qs = Xs^(Ys&~Sqrt);
// Sign for rounding calulation
assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule

View File

@ -28,23 +28,22 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module lzacorrection(
module shiftcorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic DivOp,
input logic DivResDenorm,
input logic [`NE+1:0] DivCalcExp,
input logic [`NE+1:0] DivQe,
input logic [`NE+1:0] DivDenormShift,
input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic FmaKillProd, // is the product set to zero
input logic FmaSmZero,
output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction
output logic [`NE+1:0] DivCorrExp,
output logic [`NE+1:0] FmaSe // exponent of the normalized sum
input logic FmaSZero,
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe,
output logic [`NE+1:0] FmaMe // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ:0] CorrQuotShifted;
logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -54,16 +53,19 @@ module lzacorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
else if (DivOp&~DivResDenorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
endmodule

View File

@ -29,17 +29,17 @@
`include "wally-config.vh"
module resultselect(
module specialcase(
input logic Xs, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic XNaN, YNaN, ZNaN, // inputs are NaN
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic NaNIn,
input logic XInf, YInf,
input logic XZero,
input logic IntZero,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
@ -53,10 +53,10 @@ module resultselect(
input logic IntInvalid, Invalid, Overflow, // flags
input logic CvtResUf,
input logic [`NE-1:0] Re, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent
input logic [`NE+1:0] FullRe, // Res exponent
input logic [`NF-1:0] Rf, // Res fraction
input logic [`XLEN+1:0] CvtNegRes, // the negation of the result
output logic [`FLEN-1:0] W, // final res
output logic [`FLEN-1:0] PostProcRes, // final res
output logic [`XLEN-1:0] FCvtIntRes // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
@ -95,9 +95,14 @@ module resultselect(
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
always_comb
if(OutFmt)
if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
else
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
@ -231,23 +236,24 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754) begin
assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
YNaN&~CvtOp ? YNaNRes :
ZNaN&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
SelOfRes ? OfRes :
KillRes ? UfRes :
NormRes;
end else begin
assign W = NaNIn|Invalid ? InvalidRes :
SelOfRes ? OfRes :
KillRes ? UfRes :
NormRes;
end
if(`IEEE754)
always_comb
if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
else if(ZNaN&FmaOp) PostProcRes = ZNaNRes;
else if(Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
else
always_comb
if(NaNIn|Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
///////////////////////////////////////////////////////////////////////////////////////
//
@ -272,10 +278,17 @@ module resultselect(
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
always_comb
if(Signed)
if(Xs&~NaNIn) // signed negitive
if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
else // signed positive
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
else
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
// select the integer output
@ -284,7 +297,11 @@ module resultselect(
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntRes = IntInvalid ? OfIntRes :
CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
always_comb
if(IntInvalid) FCvtIntRes = OfIntRes;
else if(CvtCe[`NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
endmodule

View File

@ -1,312 +0,0 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtradix4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`DIVLEN+2:0] Quot,
output logic [`DIVLEN+3:0] WSN, WCN,
output logic [`DIVLEN+3:0] WS, WC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
logic [3:0] q;
logic [`DIVLEN+3:0] WSA;
logic [`DIVLEN+3:0] WCA;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// *** change this for radix 4 - generate w/ stine code
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
////////////////
// Submodules //
////////////////
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
initial begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [3:0] q,
output logic [`DIVLEN+2:0] Quot
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [`DIVLEN:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Quot[`DIVLEN:0];
QMR = QM[`DIVLEN:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
// Final Quoteint is in the range [.5, 2)
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
module expcalc(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

275
pipelined/src/fpu/srt.sv Normal file
View File

@ -0,0 +1,275 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srt(
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE,
input logic Sqrt,
input logic [`DIVb:0] X,
input logic [`DIVN-2:0] Dpreproc,
input logic NegSticky,
output logic [`DIVb-(`RADIX/4):0] Qm,
output logic [`DIVb+3:0] NextWSN, NextWCN,
output logic [`DIVb+3:0] StickyWSA,
output logic [`DIVb+3:0] FirstWS, FirstWC
);
//QLEN = 1.(number of bits created for division)
// N is NF+1 or XLEN
// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-1:0
// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
// Q/QM/S/SM should be 1.b so b+1 bits or b:0
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb:0] Q[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] QM[`DIVCOPIES-1:0];// 1.b
logic [`DIVb:0] QNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] QMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] S[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SM[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] SMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb-1:0] C[`DIVCOPIES-1:0]; // 0.b
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] WSN, WCN; // Q4.N-1
logic [`DIVN-2:0] D; // U0.N-1
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1
logic [`DIVb:0] QMMux;
logic [`DIVb-1:0] NextC;
logic [`DIVb-1:0] CMux;
logic [`DIVb:0] SMux;
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
// - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
if (`RADIX == 2) begin : nextw
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+2:0], 1'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+2:0], 1'b0};
assign NextC = {1'b1, C[`DIVCOPIES-1][`DIVb-1:1]};
end else begin
assign NextWSN = {WSA[`DIVCOPIES-1][`DIVb+1:0], 2'b0};
assign NextWCN = {WCA[`DIVCOPIES-1][`DIVb+1:0], 2'b0};
assign NextC = {2'b11, C[`DIVCOPIES-1][`DIVb-1:2]};
end
// mux2 #(`DIVb+4) wsmux(NextWSN, {{3{Sqrt}}, X}, DivStart, WSN); //*** modified for sqrt which doesnt work
// flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
// mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
// flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
// flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
// mux2 #(`DIVb) Cmux(NextC, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
// flop #(`DIVb) cflop(clk, CMux, C[0]);
mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN);
flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
flop #(`DIVb) cflop(clk, CMux, C[0]);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is only the fraction
assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
if(`RADIX == 4) begin : d2
assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
end
genvar i;
generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt,
.WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]),
.C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]));
if(i<(`DIVCOPIES-1)) begin
if (`RADIX==2)begin
assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0};
assign WC[i+1] = {WCA[i][`DIVb+2:0], 1'b0};
assign C[i+1] = {1'b1, C[i][`DIVb-1:1]};
end else begin
assign WS[i+1] = {WSA[i][`DIVb+1:0], 2'b0};
assign WC[i+1] = {WCA[i][`DIVb+1:0], 2'b0};
assign C[i+1] = {2'b11, C[i][`DIVb-1:2]};
end
assign Q[i+1] = QNext[i];
assign QM[i+1] = QMNext[i];
assign S[i+1] = SNext[i];
assign SM[i+1] = SMNext[i];
end
end
endgenerate
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVb+1) QMmux(QMNext[`DIVCOPIES-1], '1, DivStart, QMMux);
flopenr #(`DIVb+1) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]);
flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]);
mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux);
flop #(`DIVb+1) Sreg(clk, SMux, S[0]);
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
always_comb
if(Sqrt) // sqrt ouputs in the range (1, .5]
if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0};
else Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0};
else
if(NegSticky) Qm = QM[0][`DIVb-(`RADIX/4):0];
else Qm = Q[0][`DIVb-(`RADIX/4):0];
assign FirstWS = WS[0];
assign FirstWC = WC[0];
if(`RADIX==2)
if (`DIVCOPIES == 1)
assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0};
else
assign StickyWSA = {WSA[1][`DIVb+2:0], 1'b0};
endmodule
////////////////
// Submodules //
////////////////
/* verilator lint_off UNOPTFLAT */
module divinteration (
input logic [`DIVN-2:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
input logic [`DIVb:0] Q, QM,
input logic [`DIVb:0] S, SM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb-1:0] C,
input logic Sqrt,
output logic [`DIVb:0] QNext, QMNext,
output logic [`DIVb:0] SNext, SMNext,
output logic [`DIVb+3:0] WSA, WCA
);
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] Dsel;
logic [3:0] q;
logic qp, qz;
logic [`DIVb+3:0] F;
logic [`DIVb+3:0] AddIn;
// Qmient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
if(`RADIX == 2) begin : qsel
qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz);
fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F);
end else begin
qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q);
// fgen4 fgen4(.s(q), .C, .S, .SM, .F);
end
if(`RADIX == 2) begin : dsel
assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}});
end else begin
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase
end
// Partial Product Generation
// WSA, WCA = WS + WC - qD
assign AddIn = Sqrt ? F : Dsel;
if (`RADIX == 2) begin : csa
csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~Sqrt, WSA, WCA);
end else begin
csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA);
end
if (`RADIX == 2) begin : otfc
otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
sotfc2 sotfc2(.sp(qp), .sz(qz), .C, .S, .SM, .SNext, .SMNext);
end else begin
otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
// sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext);
end
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule

View File

@ -33,47 +33,57 @@
module srtfsm(
input logic clk,
input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
input logic [`DIVb+3:0] NextWSN, NextWCN, WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
input logic StallE,
input logic StallM,
input logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivStickyE,
input logic XsE,
input logic SqrtE,
input logic StallE,
input logic StallM,
input logic [`DIVb+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivSE,
output logic DivDone,
output logic DivNegStickyE,
output logic NegSticky,
output logic DivBusy
);
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
statetype state;
logic [$clog2(`DIVLEN/2+3)-1:0] step;
logic [`DURLEN-1:0] step;
logic WZero;
//logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DIVLEN+3:0] W;
logic [`DIVb+3:0] W;
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = ~WZero;
assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0});
// calculate sticky bit
// - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2)
assign DivSE = |W&~(StickyWSA == WS);
else
assign DivSE = |W;
assign DivDone = (state == DONE);
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = step;
assign NegSticky = W[`DIVb+3];
assign EarlyTermShiftE = step;
always_ff @(posedge clk) begin
if (reset) begin
state <= #1 IDLE;
end else if (DivStart&~StallE) begin
step <= Dur;
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE|(XsE&SqrtE)) state <= #1 DONE;
else state <= #1 BUSY;
end else if (state == BUSY) begin
if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
state <= #1 DONE;
end
step <= step - 1;

View File

@ -31,16 +31,25 @@
`include "wally-config.vh"
module srtpreproc (
input logic [`NF:0] XManE, YManE,
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
input logic clk,
input logic DivStart,
input logic [`NF:0] Xm, Ym,
input logic [`NE-1:0] Xe, Ye,
input logic [`FMTBITS-1:0] Fmt,
input logic Sqrt,
input logic XZero,
output logic [`NE+1:0] QeM,
output logic [`DIVb:0] X,
output logic [`DIVN-2:0] Dpreproc,
output logic [`DURLEN-1:0] Dur
);
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
logic [`NF-1:0] PreprocA, PreprocX;
logic [`NF-1:0] PreprocB, PreprocY;
logic [`NF+1:0] SqrtX;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`NE+1:0] Qe;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
@ -49,24 +58,79 @@ module srtpreproc (
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
lzc #(`NF+1) lzcX (Xm, XZeroCnt);
lzc #(`NF+1) lzcY (Ym, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocX = Xm[`NF-1:0]<<XZeroCnt;
assign PreprocY = Ym[`NF-1:0]<<YZeroCnt;
assign X = PreprocX;
assign Dpreproc = PreprocY;
assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
assign X = Sqrt ? {SqrtX, {`DIVb-1-`NF{1'b0}}} : {~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
assign Dur = (`DURLEN)'(`FPDUR);
assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
// radix 2 radix 4
// 1 copies DIVLEN+2 DIVLEN+2/2
// 2 copies DIVLEN+2/2 DIVLEN+2/2*2
// 4 copies DIVLEN+2/4 DIVLEN+2/2*4
// 8 copies DIVLEN+2/8 DIVLEN+2/2*8
// DIVRESLEN = DIVLEN or DIVLEN+2
// r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES)
flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe);
endmodule
module expcalc(
input logic [`FMTBITS-1:0] Fmt,
input logic [`NE-1:0] Xe, Ye,
input logic Sqrt,
input logic XZero,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] Qe
);
logic [`NE-2:0] Bias;
logic [`NE+1:0] SXExp;
logic [`NE+1:0] SExp;
logic [`NE+1:0] DExp;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
always_comb
case (Fmt)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
assign SXExp = {2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - (`NE+1)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
// correct exponent for denormalized input's normalization shifts
assign DExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZero}};
assign Qe = Sqrt ? SExp : DExp;
endmodule

View File

@ -30,35 +30,35 @@
module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic XEn, YEn, ZEn,
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
);
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE),
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE),
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE),
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input denormalized
assign XDenormE = ~XExpNonZero & ~XFracZero;
assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
assign XDenorm = ~XExpNonZero & ~XFracZero;
assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
endmodule

View File

@ -30,7 +30,8 @@
module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic En, // enable the input
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
@ -74,16 +75,16 @@ module unpackinput (
// quad and half
// double and half
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// example double to single conversion:
// 1023 = 0011 1111 1111
@ -95,10 +96,10 @@ module unpackinput (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a denormal number convert the exponent value 1
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
// is the exponent all 1's
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported
@ -122,7 +123,7 @@ module unpackinput (
// Check NaN boxing
always_comb
case (FmtE)
case (Fmt)
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@ -131,7 +132,7 @@ module unpackinput (
// extract the sign bit
always_comb
case (FmtE)
case (Fmt)
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
@ -140,7 +141,7 @@ module unpackinput (
// extract the fraction
always_comb
case (FmtE)
case (Fmt)
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@ -149,7 +150,7 @@ module unpackinput (
// is the exponent non-zero
always_comb
case (FmtE)
case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@ -166,7 +167,7 @@ module unpackinput (
// convert the larger precision's exponent to use the largest precision's bias
always_comb
case (FmtE)
case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
@ -175,7 +176,7 @@ module unpackinput (
// is the exponent all 1's
always_comb
case (FmtE)
case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
@ -194,7 +195,7 @@ module unpackinput (
// Check NaN boxing
always_comb
case (FmtE)
case (Fmt)
2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@ -203,7 +204,7 @@ module unpackinput (
// extract sign bit
always_comb
case (FmtE)
case (Fmt)
2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1];
@ -213,7 +214,7 @@ module unpackinput (
// extract the fraction
always_comb
case (FmtE)
case (Fmt)
2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -222,7 +223,7 @@ module unpackinput (
// is the exponent non-zero
always_comb
case (FmtE)
case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
@ -240,7 +241,7 @@ module unpackinput (
// convert the double precsion exponent into quad precsion
always_comb
case (FmtE)
case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@ -250,7 +251,7 @@ module unpackinput (
// is the exponent all 1's
always_comb
case (FmtE)
case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
@ -262,8 +263,8 @@ module unpackinput (
// Output logic
assign FracZero = ~|Frac; // is the fraction zero?
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign Inf = ExpMax & FracZero; // is the input infinity?
assign Inf = ExpMax & FracZero &En; // is the input infinity?
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
endmodule

View File

@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
/* verilator lint_off CMPCONST */
/* verilator lint_off WIDTH */
int i;
logic [31:0] i;
always_comb begin
i = 0;
while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one

View File

@ -196,13 +196,13 @@ module ifu (
if (`IBUS) begin : bus
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1;
localparam integer LOGBWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1;
logic [LINELEN-1:0] ICacheBusWriteData;
logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck;
logic SelUncachedAdr;
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED)
busdp(.clk, .reset,
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
@ -222,11 +222,11 @@ module ifu (
if(CACHE_ENABLED) begin : icache
cache #(.LINELEN(`ICACHE_LINELENINBITS),
.NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS),
.NUMWAYS(`ICACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
.NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
.CacheFetchLine(ICacheFetchLine),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
@ -236,7 +236,7 @@ module ifu (
.Atomic('0), .FlushCache('0),
.NextAdr(PCNextFSpill[11:0]),
.PAdr(PCPF),
.CacheCommitted(), .InvalidateCacheM(InvalidateICacheM));
.CacheCommitted(), .InvalidateCache(InvalidateICacheM));
end else begin : passthrough
assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0;

View File

@ -126,10 +126,16 @@ module busfsm #(parameter integer WordCountThreshold,
else BusNextState = STATE_BUS_READY;
STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY;
else BusNextState = STATE_BUS_READY;
STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY;
else BusNextState = STATE_BUS_FETCH;
STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) BusNextState = STATE_BUS_READY;
else BusNextState = STATE_BUS_WRITE;
STATE_BUS_FETCH: if (WordCountFlag & LSUBusAck) begin
if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH;
else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE;
else BusNextState = STATE_BUS_READY;
end else BusNextState = STATE_BUS_FETCH;
STATE_BUS_WRITE: if(WordCountFlag & LSUBusAck) begin
if (DCacheFetchLine) BusNextState = STATE_BUS_FETCH;
else if (DCacheWriteLine) BusNextState = STATE_BUS_WRITE;
else BusNextState = STATE_BUS_READY;
end else BusNextState = STATE_BUS_WRITE;
default: BusNextState = STATE_BUS_READY;
endcase
end

View File

@ -58,7 +58,7 @@ module lsu (
input logic sfencevmaM,
// fpu
input logic [`FLEN-1:0] FWriteDataM,
input logic FLoad2,
input logic FStore2,
input logic FpLoadStoreM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
@ -77,8 +77,6 @@ module lsu (
(* mark_debug = "true" *) output logic [2:0] LSUBurstType,
(* mark_debug = "true" *) output logic [1:0] LSUTransType,
(* mark_debug = "true" *) output logic LSUTransComplete,
output logic [(`XLEN-1)/8:0] ByteMaskM,
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -116,6 +114,7 @@ module lsu (
logic [`XLEN-1:0] LSUWriteDataM;
logic [`XLEN-1:0] WriteDataM;
logic [`LLEN-1:0] ReadDataM;
logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM;
// *** TO DO: Burst mode
@ -192,7 +191,8 @@ module lsu (
// Memory System
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
/////////////////////////////////////////////////////////////////////////////////////////////
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
logic [`LLEN-1:0] FinalWriteDataM;
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`LLEN-1:0] ReadDataWordMuxM;
logic IgnoreRequest;
@ -202,24 +202,24 @@ module lsu (
if (`DMEM == `MEM_TIM) begin : dtim
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
.DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM),
.DCacheMiss, .DCacheAccess);
end
if (`DBUS) begin : bus
localparam CACHE_ENABLED = `DMEM == `MEM_CACHE;
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1;
localparam integer LOGBWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1;
logic [LINELEN-1:0] DCacheBusWriteData;
logic [`PA_BITS-1:0] DCacheBusAdr;
logic DCacheWriteLine;
logic DCacheFetchLine;
logic DCacheBusAck;
logic [LOGWPL-1:0] WordCount;
logic [LOGBWPL-1:0] WordCount;
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
busdp #(WORDSPERLINE, LINELEN, LOGBWPL, CACHE_ENABLED) busdp(
.clk, .reset,
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.WordCount, .LSUBusWriteCrit,
@ -230,21 +230,25 @@ module lsu (
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA));
if(CACHE_ENABLED) begin : dcache
if (`LLEN>`XLEN)
mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
else
assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
.ByteMask(FinalByteMaskM), .WordCount,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM),
.CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine),
.CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0));
.CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
end else begin : passthrough
assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0;
@ -272,7 +276,13 @@ module lsu (
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM);
// Compute byte masks
swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM));
// *** fix when when fstore2 is valid. I'm not sure this is even needed if LSUFunct3M can be 3'b100 for a 16 byte write.
//assign FinalByteMaskM = FStore2 ? '1 : ByteMaskM;
assign FinalByteMaskM = ByteMaskM;
/////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register
@ -286,10 +296,10 @@ module lsu (
// swap the bytes when read from big-endian memory
/////////////////////////////////////////////////////////////////////////////////////////////
if (`BIGENDIAN_SUPPORTED) begin:endian
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
end else begin
assign FinalWriteDataM = LittleEndianWriteDataM;
assign IEUWriteDataM = LittleEndianWriteDataM;
assign LittleEndianReadDataWordM = ReadDataWordM;
end

View File

@ -34,13 +34,8 @@ module subwordwrite (
input logic [2:0] LSUPAdrM,
input logic [2:0] LSUFunct3M,
input logic [`XLEN-1:0] AMOWriteDataM,
output logic [`XLEN-1:0] LittleEndianWriteDataM,
output logic [`XLEN/8-1:0] ByteMaskM
);
output logic [`XLEN-1:0] LittleEndianWriteDataM);
// Compute byte masks
swbytemask swbytemask(.Size(LSUFunct3M[1:0]), .Adr(LSUPAdrM), .ByteMask(ByteMaskM));
// Replicate data for subword writes
if (`XLEN == 64) begin:sww
always_comb

View File

@ -0,0 +1,59 @@
///////////////////////////////////////////
// swbytemask.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: On-chip RAM, external to core
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR CO///////////////////////////////////////////
// swbytemask.sv
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module swbytemaskword #(parameter WORDLEN = 64)(
input logic [2:0] Size,
input logic [$clog2(WORDLEN/8)-1:0] Adr,
output logic [WORDLEN/8-1:0] ByteMask);
assign ByteMask = ((2**(2**Size))-1) << Adr;
/* Equivalent to the following for WORDLEN = 64
if(WORDLEN == 64) begin
always_comb begin
case(Size[1:0])
2'b00: begin ByteMask = 8'b00000000; ByteMask[Adr[2:0]] = 1; end // sb
2'b01: case (Adr[2:1])
2'b00: ByteMask = 8'b0000_0011;
2'b01: ByteMask = 8'b0000_1100;
2'b10: ByteMask = 8'b0011_0000;
2'b11: ByteMask = 8'b1100_0000;
endcase
2'b10: if (Adr[2]) ByteMask = 8'b11110000;
else ByteMask = 8'b00001111;
2'b11: ByteMask = 8'b1111_1111;
default ByteMask = 8'b0000_0000;
endcase
end
end
*/
endmodule

View File

@ -172,8 +172,8 @@ module plic_apb (
end
// pending interrupt requests
//assign nextIntPending = (intPending | requests) & ~intInProgress; //
assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals

View File

@ -165,6 +165,7 @@ module uartPC16550D(
SCR <= #1 8'b0; // not strictly necessary to reset
end else begin
if (~MEMWb) begin
/* verilator lint_off CASEINCOMPLETE */
case (A)
/* -----\/----- EXCLUDED -----\/-----
3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
@ -177,34 +178,42 @@ module uartPC16550D(
// freq /baud / 16 = div
//3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section
//3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in
3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in
3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in
3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0];
3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing
3'b011: LCR <= #1 Din;
3'b100: MCR <= #1 Din[4:0];
3'b101: LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3
3'b110: MSR <= #1 Din[3:0];
3'b111: SCR <= #1 Din;
endcase
/* verilator lint_on CASEINCOMPLETE */
end
// Line Status Register (8.6.3)
// Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay.
LSR[0] <= #1 rxdataready; // Data ready
LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error
LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error
LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error
LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator
LSR[5] <= #1 THRE; // THRE
LSR[6] <= #1 ~txsrfull & THRE; // TEMT
if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error
// Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay.
if (~MEMWb & (A == 3'b101))
LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3
else begin
LSR[0] <= #1 rxdataready; // Data ready
LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error
LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error
LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error
LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator
LSR[5] <= #1 THRE; // THRE
LSR[6] <= #1 ~txsrfull & THRE; // TEMT
if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error
end
// Modem Status Register (8.6.8)
MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready
MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
if (~MEMWb & (A == 3'b110))
MSR <= #1 Din[3:0];
else if (~MEMRb & (A == 3'b110))
MSR <= #1 4'b0; // Reading MSR clears the flags in MSR bits 3:0
else begin
MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready
MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
end
end
always_comb
if (~MEMRb)
@ -215,7 +224,8 @@ module uartPC16550D(
3'b011: Dout = LCR;
3'b100: Dout = {3'b000, MCR};
3'b101: Dout = LSR;
3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]};
// 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]};
3'b110: Dout = {~DCDbsync, ~RIbsync, ~DSRbsync, ~CTSbsync, MSR[3:0]};
3'b111: Dout = SCR;
endcase
else Dout = 8'b0;
@ -304,7 +314,7 @@ module uartPC16550D(
// ERROR CONDITIONS
assign rxparity = ^rxdata;
assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted)
assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted)
assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full
assign rxframingerr = ~rxstopbit; // framing error if no stop bit
assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time
@ -314,11 +324,13 @@ module uartPC16550D(
if (~PRESETn) begin
rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0;
end else begin
if (rxstate == UART_DONE) begin
if (~MEMWb & (A == 3'b010) & Din[1]) begin
rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0;
end else if (rxstate == UART_DONE) begin
RXBR <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata}; // load recevive buffer register
if (rxoverrunerr) $warning("UART RX Overrun Error\n");
if (rxparityerr) $warning("UART RX Parity Error\n");
if (rxframingerr) $warning("UART RX Framing Error\n");
if (rxoverrunerr) $warning("UART RX Overrun Err\n");
if (rxparityerr) $warning("UART RX Parity Err\n");
if (rxframingerr) $warning("UART RX Framing Err\n");
if (fifoenabled) begin
rxfifo[rxfifohead] <= #1 {rxoverrunerr, rxparityerr, rxframingerr, rxdata};
rxfifohead <= #1 rxfifohead + 1;
@ -327,7 +339,8 @@ module uartPC16550D(
end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo
if (fifoenabled) begin
if (~rxfifoempty) rxfifotail <= #1 rxfifotail + 1;
if (rxfifoempty) rxdataready <= #1 0;
// if (rxfifoempty) rxdataready <= #1 0;
if (rxfifoentries == 1) rxdataready <= #1 0; // When reading the last entry, data ready becomes zero
end else begin
rxdataready <= #1 0;
RXBR <= #1 {1'b0, RXBR[9:0]}; // Ben 31 March 2022: I added this so that rxoverrunerr permanently goes away upon reading RBR (when not in FIFO mode)
@ -405,7 +418,7 @@ module uartPC16550D(
txstate <= #1 UART_IDLE;
end
assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s)
assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1
// *** explain; is this necessary?
if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE
else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE
@ -438,6 +451,8 @@ module uartPC16550D(
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff;
end else if (~MEMWb & (A == 3'b010) & Din[2]) begin
txfifohead <= #1 0; txfifotail <= #1 0;
end else begin
if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo
if (fifoenabled) begin
@ -451,7 +466,7 @@ module uartPC16550D(
end
if (txstate == UART_IDLE) begin // move data into tx shift register if available
if (fifoenabled) begin
if (~txfifoempty) begin
if (~txfifoempty & ~txsrfull) begin
txsr <= #1 txdata;
txfifotail <= #1 txfifotail+1;
txsrfull <= #1 1;

View File

@ -93,7 +93,7 @@ module wallypipelinedcore (
logic FStallD;
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic FLoad2;
logic FStore2;
logic [`FLEN-1:0] FWriteDataM;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
@ -116,7 +116,6 @@ module wallypipelinedcore (
logic [1:0] PageType;
logic sfencevmaM, wfiM, IntPendingM;
logic SelHPTW;
logic [`XLEN/8-1:0] ByteMaskM;
// PMA checker signals
@ -259,14 +258,13 @@ module wallypipelinedcore (
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadStoreM,
.FWriteDataM, .FLoad2,
.FWriteDataM, .FStore2,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataW, .FlushDCacheM,
// connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.ByteMaskM,
// connect to csr or privilege and stay the same.
.PrivilegeModeW, .BigEndianM, // connects to csr
@ -313,7 +311,6 @@ module wallypipelinedcore (
.LSUTransComplete,
.LSUBusAck,
.LSUBusInit,
.ByteMaskM,
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
.HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST,
@ -400,7 +397,7 @@ module wallypipelinedcore (
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadStoreM,
.FLoad2,
.FStore2,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory

View File

@ -1,4 +1,4 @@
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen
sqrttestgen: sqrttestgen.c
gcc sqrttestgen.c -o sqrttestgen -lm
@ -28,6 +28,10 @@ inttestgen: inttestgen.c
gcc -lm -o inttestgen inttestgen.c
./inttestgen
modtestgen: modtestgen.c
gcc -lm -o modtestgen modtestgen.c
./modtestgen
clean:
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen

Binary file not shown.

View File

@ -1,15 +1,14 @@
/* testgen.c */
/* Written 10/31/96 by David Harris
/* Written 7/21/2022 by Cedar Turek
This program creates test vectors for mantissa component
of an IEEE floating point divider.
This program creates test vectors for integer divide.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
@ -19,7 +18,7 @@
/* Prototypes */
void output(FILE *fptr, long a, long b, long r, long rem);
void output(FILE *fptr, long a, long b, long r);
void printhex(FILE *fptr, long x);
double random_input(void);
@ -28,7 +27,7 @@ double random_input(void);
void main(void)
{
FILE *fptr;
long a, b, r, rem;
long a, b, r;
long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
int i, j;
@ -42,32 +41,22 @@ void main(void)
for (j=0; j<ENTRIES; j++) {
a = list[j];
r = a/b;
rem = a%b;
output(fptr, a, b, r, rem);
output(fptr, a, b, r);
}
}
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
// b = random_input();
// r = a/b;
// output(fptr, a, b, r);
// }
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, long a, long b, long r, long rem)
void output(FILE *fptr, long a, long b, long r)
{
printhex(fptr, a);
fprintf(fptr, "_");
printhex(fptr, b);
fprintf(fptr, "_");
printhex(fptr, r);
fprintf(fptr, "_");
printhex(fptr, rem);
fprintf(fptr, "\n");
}

View File

@ -1,2 +1 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

BIN
pipelined/srt/modtestgen Executable file

Binary file not shown.

View File

@ -0,0 +1,73 @@
/* testgen.c */
/* Written 7/21/2022 by Cedar Turek
This program creates test vectors for modulo
calculation from integer divide.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
#define ENTRIES 10
#define RANDOM_VECS 500
/* Prototypes */
void output(FILE *fptr, long a, long b, long rem);
void printhex(FILE *fptr, long x);
double random_input(void);
/* Main */
void main(void)
{
FILE *fptr;
long a, b, rem;
long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
int i, j;
if ((fptr = fopen("modtestvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}
for (i=0; i<ENTRIES; i++) {
b = list[i];
for (j=0; j<ENTRIES; j++) {
a = list[j];
rem = a%b;
output(fptr, a, b, rem);
}
}
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, long a, long b, long rem)
{
printhex(fptr, a);
fprintf(fptr, "_");
printhex(fptr, b);
fprintf(fptr, "_");
printhex(fptr, rem);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, long m)
{
fprintf(fptr, "%016llx", m);
}
double random_input(void)
{
return 1.0 + rand()/32767.0;
}

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t10'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -4)
printf("0000");
else if ((pla.tot) >= -13)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 14)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -15)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 15)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -16)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 16)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -18)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 18)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -22)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -24)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

Binary file not shown.

View File

@ -1,190 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -4)
printf("0");
else if ((pla.tot) >= -13)
printf("2");
else
printf("1");
break;
case 1:
if ((pla.tot) >= 14)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -15)
printf("2");
else
printf("1");
break;
case 2:
if ((pla.tot) >= 15)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -16)
printf("2");
else
printf("1");
break;
case 3:
if ((pla.tot) >= 16)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -18)
printf("2");
else
printf("1");
break;
case 4:
if ((pla.tot) >= 18)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 5:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 6:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -22)
printf("2");
else
printf("1");
break;
case 7:
if ((pla.tot) >= 24)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -24)
printf("2");
else
printf("1");
break;
default: printf ("X");
}
printf("\n");
(pla.tot)++;
}
(pla.divisor)++;
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t11'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -26)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 28)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -10)
printf("0000");
else if ((pla.tot) >= -28)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -32)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -34)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 36)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -36)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -40)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -44)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 44)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -46)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,6 +1,6 @@
/* sqrttestgen.c */
/* Written 19 October 2021 David_Harris@hmc.edu
/* Written 7/22/2022 by Cedar Turek
This program creates test vectors for mantissa component
of an IEEE floating point square root.
@ -15,6 +15,7 @@
/* Constants */
#define ENTRIES 17
#define BIGENT 1000
#define RANDOM_VECS 500
/* Prototypes */
@ -30,15 +31,14 @@ void main(void)
FILE *fptr;
double aFrac, rFrac;
int aExp, rExp;
double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001,
<<<<<<< Updated upstream
1/1.1, 1/1.5, 1/1.25, 1/1.125};
=======
1.1, 1.5, 1.01, 1.001, 1.0001,
2/1.1, 2/1.5, 2/1.25, 2/1.125};
>>>>>>> Stashed changes
double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
double bigtest[BIGENT];
double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
11, 12, 13, 14, 15, 16};
int i;
int bias = 1023;
@ -48,13 +48,23 @@ void main(void)
exit(1);
}
for (i=0; i<ENTRIES; i++) {
aFrac = mans[i];
aExp = exps[i] + bias;
rFrac = sqrt(aFrac * pow(2, aExp - bias));
rExp = (int) (log(rFrac)/log(2) + bias);
output(fptr, aExp, aFrac, rExp, rFrac);
}
// Small Test
// for (i=0; i<ENTRIES; i++) {
// aFrac = mans[i];
// aExp = exps[i] + bias;
// rFrac = sqrt(aFrac * pow(2, exps[i]));
// rExp = (int) (log(rFrac)/log(2) + bias);
// output(fptr, aExp, aFrac, rExp, rFrac);
// }
// WS
// Test 1: sqrt(1) = 1 0000 0000 0000 00
// Test 2: sqrt(1849/1024) = 43/32 0000 1100 1110 01
// Test 3: sqrt(5) 0000 0100 0000 00
// Test 4: sqrt(9) = 3 1111 1001 0000 00
// Test 5: sqrt(17) 0000 0001 0000 00
// Test 6: sqrt(56) 1111 1110 0000 00
// Test 7: sqrt(120) 0000 1110 0000 00
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
@ -62,6 +72,16 @@ void main(void)
// output(fptr, a, r);
// }
// Big Test
for (i=0; i<BIGENT; i++) {
bigtest[i] = random_input();
aFrac = bigtest[i];
aExp = (i - BIGENT/2) + bias;
rFrac = sqrt(aFrac * pow(2, (i - BIGENT/2)));
rExp = (int) (log(rFrac)/log(2) + bias);
output(fptr, aExp, aFrac, rExp, rFrac);
}
fclose(fptr);
}
@ -69,14 +89,19 @@ void main(void)
void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
{
// Print a in standard double format
fprintf(fptr, "%03x", aExp);
printhex(fptr, aFrac);
fprintf(fptr, "_");
// Spacing for testbench, value doesn't matter
fprintf(fptr, "%016x", 0);
fprintf(fptr, "_");
// Print r in standard double format
fprintf(fptr, "%03x", rExp);
printhex(fptr, rFrac);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, double m)
@ -95,6 +120,6 @@ void printhex(FILE *fptr, double m)
double random_input(void)
{
return 1.0 + rand()/32767.0;
return 1.0 + ((rand() % 32768)/32767.0);
}

View File

@ -1,5 +1,7 @@
add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/*
add wave -noupdate /testbench/srt/sotfc2/*
add wave -noupdate /testbench/srt/preproc/*
add wave -noupdate /testbench/srt/postproc/*
add wave -noupdate /testbench/srt/expcalc/*
add wave -noupdate /testbench/srt/divcounter/*

View File

@ -2,7 +2,7 @@
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified: cturek@hmc.edu June 2022
// Modified: cturek@hmc.edu July 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
@ -29,40 +29,42 @@
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk,
input logic Start,
input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
// Floating Point
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] SrcXFrac, SrcYFrac,
// Integer
input logic [`XLEN-1:0] SrcA, SrcB,
// Customization
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
// Selection
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Mod, // perform remainder calculation (modulo) instead of divide
input logic Sqrt, // perform square root, not divide
output logic rsign, done,
output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
output logic [`DIVLEN-1:0] Result,
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
logic qp, qz, qm; // quotient is +1, 0, or -1
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN+3:0] X, Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
logic intSign;
logic qp, qz, qn; // result bits are +1, 0, or -1
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [$clog2(`XLEN+1)-1:0] zeroCntD, intExp, dur, calcDur;
logic intSign;
logic cin;
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Mod, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -76,26 +78,35 @@ module srt (
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], Sqrt, qp, qz, qn);
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
flopen #(7) durflop(clk, Start, calcDur, dur);
counter divcounter(clk, Start, dur, done);
srtcounter divcounter(clk, Start, dur, done);
// Divisor Selection logic
assign Db = ~D;
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
// If only implementing division, use divide otfc
// otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
// otherwise use sotfc
creg sotfcC(clk, Start, Sqrt, C);
sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, S, SM);
fsel2 fsel(qp, qn, C, S, SM, F);
// Adder input selection
assign AddIn = Sqrt ? F : Dsel;
// Partial Product Generation
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
assign cin = ~Sqrt & qp;
csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
signcalc signcalc(.XSign, .YSign, .calcSign);
srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Mod, .Result, .calcSign);
endmodule
////////////////
@ -113,47 +124,59 @@ module srtpreproc (
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Mod, // perform remainder calculation (modulo) instead of divide
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN+3:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent
output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [$clog2(`XLEN+1)-1:0] zeroCntA;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
logic [`NF+4:0] SqrtX;
// Generate positive integer inputs if they are signed
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// Calculate leading zeros of integer inputs
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
// Make integers have DIVLEN bits
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
// Shift integers to have leading ones
assign PreprocA = ExtraA << (zeroCntA + 1);
assign PreprocB = ExtraB << (zeroCntB + 1);
// Make mantissas have DIVLEN bits
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
// Selecting correct divider inputs
assign DivX = Int ? PreprocA : PreprocX;
assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
assign X = Sqrt ? SqrtX : {4'b0001, DivX};
assign SqrtX = XExp[0] ? {5'b11101, SrcXFrac} : {4'b1111, SrcXFrac, 1'b0};
assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
assign D = {4'b0001, Int ? PreprocB : PreprocY};
assign intExp = zeroCntB - zeroCntA + 1;
// Integer exponent and sign calculations
assign intExp = zeroCntB - zeroCntA - Mod + (PreprocA >= PreprocB);
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
// Number of cycles of divider
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
module qsel2 (
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
input logic Sqrt,
output logic qp, qz, qn
);
logic [`DIVLEN+3:`DIVLEN] p, g;
@ -168,7 +191,7 @@ module qsel2 ( // *** eventually just change to 4 bits
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN])));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
@ -180,7 +203,7 @@ module qsel2 ( // *** eventually just change to 4 bits
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
assign #1 qn = magnitude & sign;
endmodule
////////////////////////////////////
@ -191,45 +214,41 @@ module fsel2 (
input logic [`DIVLEN+3:0] C, S, SM,
output logic [`DIVLEN+3:0] F
);
logic [`DIVLEN+3:0] FP, FN;
logic [`DIVLEN+3:0] FP, FN, FZ;
// Generate for both positive and negative bits
assign FP = ~S & C;
assign FN = SM | (C & (~C << 2));
assign FP = ~(S << 1) & C;
assign FN = (SM << 1) | (C & (~C << 2));
assign FZ = '0;
// Choose which adder input will be used
assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0});
always_comb
if (sp) F = FP;
else if (sn) F = FN;
else F = FZ;
// assign F = sp ? FP : (sn ? FN : FZ);
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc2 #(parameter N=64) (
module otfc2 #(parameter N=66) (
input logic clk,
input logic Start,
input logic qp, qz, qm,
output logic [N-1:0] r
input logic qp, qz, qn,
output logic [N-3:0] Result
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
// bits to the quotient as they come.
// Use this otfc for division only.
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin
@ -241,35 +260,73 @@ module otfc2 #(parameter N=64) (
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qm is
end else begin // If qp and qz are not true, then qn is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
assign Result = Q[N] ? Q[N-1:2] : Q[N-2:1];
endmodule
///////////////////////////////
// Square Root OTFC, Radix 2 //
///////////////////////////////
module softc2(
input logic clk,
input logic Start,
input logic sp, sn,
output logic S,
module sotfc2(
input logic clk,
input logic Start,
input logic sp, sn,
input logic Sqrt,
input logic [`DIVLEN+3:0] C,
output logic [`DIVLEN+3:0] S, SM
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [`DIVLEN+3:0] SNext, SMNext, SMux;
flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
flop #(`DIVLEN+4) Sreg(clk, SMux, S);
always_comb begin
if (sp) begin
SNext = S | (C & ~(C << 1));
SMNext = S;
end else if (sn) begin
SNext = SM | (C & ~(C << 1));
SMNext = SM;
end else begin // If sp and sn are not true, then sz is
SNext = S;
SMNext = SM | (C & ~(C << 1));
end
end
endmodule
//////////////////////////
// C Register for SOTFC //
//////////////////////////
module creg(input logic clk,
input logic Start,
input logic Sqrt,
output logic [`DIVLEN+3:0] C
);
logic [`DIVLEN+3:0] CMux;
mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
flop #(`DIVLEN+4) cflop(clk, CMux, C);
endmodule
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done);
module srtcounter(input logic clk,
input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done
);
logic [$clog2(`XLEN+1)-1:0] count;
logic [$clog2(`XLEN+1)-1:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
@ -332,22 +389,86 @@ module expcalc(
input logic Sqrt,
output logic [`NE-1:0] calcExp
);
logic [`NE-1:0] SExp, DExp, SXExp;
assign SXExp = XExp - (`NE)'(`BIAS);
assign SExp = {1'b0, SXExp[`NE-1:1]} + (`NE)'(`BIAS);
assign DExp = XExp - YExp + (`NE)'(`BIAS);
assign calcExp = Sqrt ? SExp : DExp;
logic [`NE+1:0] SExp, DExp, SXExp;
assign SXExp = {2'b00, XExp} - (`NE+2)'(`BIAS);
assign SExp = (SXExp >> 1) + (`NE+2)'(`BIAS);
assign DExp = {2'b00, XExp} - {2'b00, YExp} + (`NE+2)'(`BIAS);
assign calcExp = Sqrt ? SExp[`NE-1:0] : DExp[`NE-1:0];
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
module srtpostproc(
input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM,
input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD,
input logic XSign, YSign, Signed, Int, Mod,
output logic [`DIVLEN-1:0] Result,
output logic calcSign
);
logic [`DIVLEN+3:0] W, shiftRem, intRem, intS;
logic [`DIVLEN-1:0] floatRes, intRes;
logic WSign;
assign W = WS + WC;
assign WSign = W[`DIVLEN+3];
// Remainder handling
always_comb begin
if (zeroCntD == ($clog2(`XLEN+1))'(`XLEN)) begin
intRem = X;
intS = -1;
end
else if (~Signed) begin
if (WSign) begin
intRem = W + D;
intS = SM;
end else begin
intRem = W;
intS = S;
end
end
else case ({YSign, XSign, WSign})
3'b000: begin
intRem = W;
intS = S;
end
3'b001: begin
intRem = W + D;
intS = SM;
end
3'b010: begin
intRem = W - D;
intS = ~S;
end
3'b011: begin
intRem = W;
intS = ~SM;
end
3'b100: begin
intRem = W;
intS = ~SM;
end
3'b101: begin
intRem = W + D;
intS = ~SM + 1;
end
3'b110: begin
intRem = W - D;
intS = S + 1;
end
3'b111: begin
intRem = W;
intS = S;
end
endcase
end
assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0];
assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0];
assign shiftRem = (intRem >> (zeroCntD));
always_comb begin
if (Int) begin
if (Mod) Result = shiftRem[`DIVLEN-1:0];
else Result = intRes >> (`DIVLEN - dur);
end else Result = floatRes;
end
assign calcSign = XSign ^ YSign;
endmodule
endmodule

View File

@ -1,4 +1,4 @@
`define DIVLEN 64
`include "wally-config.vh"
/////////////
// counter //
@ -39,37 +39,26 @@ endmodule
// testbench //
//////////
module testbench;
logic clk;
logic req;
logic done;
logic Int;
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r;
logic [63:0] rInt;
logic [`DIVLEN-1:0] Quot;
logic clk;
logic req;
logic done;
logic Int, Sqrt, Mod;
logic [`XLEN-1:0] a, b;
logic [`NF-1:0] afrac, bfrac;
logic [`NE-1:0] aExp, bExp;
logic asign, bsign;
logic [`NF-1:0] r;
logic [`XLEN-1:0] rInt;
logic [`DIVLEN-1:0] Quot;
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 64+64+64+64;
parameter MEM_WIDTH = 64+64+64;
// INT TEST SIZES
// `define memrem 63:0
// `define memr 127:64
// `define memb 191:128
// `define mema 255:192
// FLOAT TEST SIZES
// `define memr 63:0
// `define memb 127:64
// `define mema 191:128
// SQRT TEST SIZES
// Test sizes
`define memr 63:0
`define mema 127:64
`define memb 191:128
`define memb 127:64
`define mema 191:128
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
@ -80,8 +69,9 @@ module testbench;
logic rsign;
integer testnum, errors;
// Equip Int test or Sqrt test
assign Int = 1'b0;
// Equip Int, Sqrt, or IntMod test
assign Int = 1'b0;
assign Mod = 1'b0;
assign Sqrt = 1'b1;
// Divider
@ -91,8 +81,8 @@ module testbench;
.XSign(asign), .YSign(bsign), .rsign,
.SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA(a), .SrcB(b), .Fmt(2'b00),
.W64(1'b1), .Signed(1'b0), .Int, .Sqrt,
.Quot, .Rem(), .Flags(), .done);
.W64(1'b1), .Signed(1'b0), .Int, .Mod, .Sqrt,
.Result(Quot), .Flags(), .done);
// Counter
// counter counter(clk, req, done);
@ -118,7 +108,7 @@ module testbench;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = Quot;
req <= #5 1;
end
@ -126,10 +116,10 @@ module testbench;
// Apply directed test vectors read from file.
always @(posedge clk) begin
r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
rInt = Quot;
if (done) begin
if (~Int & ~Sqrt) begin
if (~Int & ~Sqrt) begin // This test case checks floating point division
req <= #5 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
@ -145,35 +135,32 @@ module testbench;
$display("%d Tests completed successfully", testnum);
$stop;
end
end else if (~Sqrt) begin
end else if (~Sqrt) begin // This test case works for both integer divide and integer modulo
req <= #5 1;
diffp = correctr[63:0] - rInt;
diffn = rInt - correctr[63:0];
if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
if (($signed(diffp) != 0) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp);
$display("result was %h, should be %h %h\n", rInt, correctr, diffp);
$display("failed\n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx)
begin
$display("%d Tests completed successfully", testnum);
$display("%d Tests completed successfully", testnum - errors);
$stop;
end
end else begin
end else begin // This test case verifies square root
req <= #5 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors + 1;
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx) begin
$display("%d Tests completed successfully", testnum);
$display("%d Tests completed successfully", testnum-errors);
$stop; end
end
end

View File

@ -66,9 +66,9 @@ module testbenchfp;
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic AnsNaN, ResNaN, NaNGood;
logic XSgn, YSgn, ZSgn; // sign of the inputs
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
logic Xs, Ys, Zs; // sign of the inputs
logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs
logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs
logic XNaN, YNaN, ZNaN; // is the input NaN
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
logic XDenorm, ZDenorm; // is the input denormalized
@ -80,24 +80,26 @@ module testbenchfp;
logic CvtResSgnE;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic [`DIVb-(`RADIX/4):0] Quot;
logic CvtResDenormUfE;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
logic [`DURLEN-1:0] EarlyTermShift;
logic DivStart, DivBusy;
logic reset = 1'b0;
logic [`DIVLEN-1:0] DivX;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WSN, WS;
logic [`DIVLEN+3:0] WCN, WC;
logic [`DIVLEN+3:0] NextWSN, WS;
logic [`DIVLEN+3:0] NextWCN, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DURLEN-1:0] Dur;
// in-between FMA signals
logic Mult;
logic Ss;
logic [`NE+1:0] Pe;
logic [`NE+1:0] Se;
logic ZmSticky;
logic KillProd;
logic [$clog2(3*`NF+7)-1:0] NCnt;
logic [$clog2(3*`NF+7)-1:0] SCnt;
logic [3*`NF+5:0] Sm;
logic InvA;
logic NegSum;
@ -254,16 +256,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b11};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
// // add the square-root tests/op-ctrls/unit/fmt
// Tests = {Tests, f128sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b11};
// end
// end
if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
// add the square-root tests/op-ctrls/unit/fmt
Tests = {Tests, f128sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11};
end
end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
Tests = {Tests, f128fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -381,16 +383,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b01};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f64sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b01};
// end
// end
if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01};
end
end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
Tests = {Tests, f64fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -492,16 +494,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b00};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f32sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b00};
// end
// end
if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00};
end
end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
Tests = {Tests, f32fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -585,16 +587,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b10};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f16sqrt};
// OpCtrl = {OpCtrl, `SQRT_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b10};
// end
// end
if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10};
end
end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
Tests = {Tests, f16fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
@ -648,14 +650,14 @@ module testbenchfp;
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN),
.XDenormE(XDenorm), .ZDenormE(ZDenorm),
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
.XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax),
.Xs, .Ys, .Zs, .Unit(UnitVal),
.Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
.Xm, .Ym, .Zm, .DivStart,
.XNaN, .YNaN, .ZNaN,
.XSNaN, .YSNaN, .ZSNaN,
.XDenorm, .ZDenorm,
.XZero, .YZero, .ZZero,
.XInf, .YInf, .ZInf, .XExpMax,
.X, .Y, .Z);
@ -671,35 +673,34 @@ module testbenchfp;
///////////////////////////////////////////////////////////////////////////////////////////////
// instantiate devices under test
fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn),
.Xe(XExp), .Ye(YExp), .Ze(ZExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan),
.XZero, .YZero, .ZZero,
.FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs),
.Xe(Xe), .Ye(Ye), .Ze(Ze),
.Xm(Xm), .Ym(Ym), .Zm(Zm),
.XZero, .YZero, .ZZero, .Ss, .Se,
.OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps,
.Pe, .ZmSticky, .KillProd);
postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
.Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
.Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky,
postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
.Ze(Ze), .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
.XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
.FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
.FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se),
.FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal),
.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal),
.XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal),
.XZero(XZero), .XDenorm(XDenorm), .OpCtrl(OpCtrlVal), .IntZero,
.Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
.XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
.Quot, .Rem(), .DivCalcExpM(DivCalcExp));
fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye,
.Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
.XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
divsqrt divsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart),
.StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp),
.EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone);
assign CmpFlg[3:0] = 0;
@ -854,7 +855,7 @@ end
// check if result is correct
// - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -867,10 +868,10 @@ end
// Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but
// the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) |
(~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[`XLEN-1:0] === (`XLEN)'(0))) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) |
(~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -923,18 +924,19 @@ module readvectors (
output logic [`FLEN-1:0] Ans,
output logic [`XLEN-1:0] SrcA,
output logic [4:0] AnsFlg,
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XExpMaxE,
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax,
output logic DivStart,
output logic [`FLEN-1:0] X, Y, Z
);
logic XEn, YEn, ZEn;
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
@ -1005,40 +1007,72 @@ module readvectors (
end
endcase
`DIVUNIT:
case (Fmt)
2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
2'b01: if (`D_SUPPORTED)begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b00: if (`S_SUPPORTED)begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
endcase
if(OpCtrl[0])
case (Fmt)
2'b11: begin // quad
X = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
2'b01: if (`D_SUPPORTED)begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b00: if (`S_SUPPORTED)begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
endcase
else
case (Fmt)
2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
2'b01: if (`D_SUPPORTED)begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b00: if (`S_SUPPORTED)begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
endcase
`CMPUNIT:
case (Fmt)
2'b11: begin // quad
@ -1256,8 +1290,12 @@ module readvectors (
endcase
end
unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE);
assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]));
assign ZEn = (Unit == `FMAUNIT);
unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
.Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
.XDenorm, .ZDenorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
.XEn, .YEn, .ZEn, .XExpMax);
endmodule

View File

@ -89,7 +89,8 @@ logic [3:0] dummy;
if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
else tests = {arch64c};
"arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"arch64f": if (`F_SUPPORTED) tests = arch64f;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
@ -112,6 +113,7 @@ logic [3:0] dummy;
else tests = {arch32c};
"arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"arch32d": if (`D_SUPPORTED) tests = arch32d;
"imperas32i": tests = imperas32i;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
@ -123,6 +125,7 @@ logic [3:0] dummy;
"wally32priv": tests = wally32priv;
"wally32periph": tests = wally32periph;
"embench": tests = embench;
"coremark": tests = coremark;
endcase
end
if (tests.size() == 0) begin
@ -177,7 +180,7 @@ logic [3:0] dummy;
testadr = 0;
testadrNoBase = 0;
// riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and tests[0] == "2" refers to WallyRiscvArchTests
riscofTest = tests[0] == "1"; // | tests[0] == "2";
riscofTest = tests[0] == "1" | tests[0] == "2";
// fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte
@ -193,13 +196,19 @@ logic [3:0] dummy;
/* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0];
else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"};
if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
else memfilename = {pathname, tests[test], ".elf.memfile"};
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
if (riscofTest) begin
ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
end else begin
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
end
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
@ -239,7 +248,8 @@ logic [3:0] dummy;
// this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
// also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
$display("Embench Benchmark: %s is done.", tests[test]);
outputfile = {pathname, tests[test], ".sim.output"};
if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"};
else outputfile = {pathname, tests[test], ".sim.output"};
outputFilePointer = $fopen(outputfile);
i = 0;
while ($unsigned(i) < $unsigned(5'd5)) begin
@ -254,7 +264,7 @@ logic [3:0] dummy;
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
if (riscofTest) signame = {pathname, tests[test], "/ref/Reference-sail_c_simulator.signature"};
else signame = {pathname, tests[test], ".signature.output"};
// read signature, reformat in 64 bits if necessary
$readmemh(signame, sig32);
@ -311,14 +321,20 @@ logic [3:0] dummy;
else begin
// If there are still additional tests to run, read in information for the next test
//pathname = tvpaths[tests[0]];
memfilename = {pathname, tests[test], ".elf.memfile"};
if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
else memfilename = {pathname, tests[test], ".elf.memfile"};
//$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
if (riscofTest) begin
ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
end else begin
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
end
ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 };
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
@ -427,8 +443,13 @@ module DCacheFlushFSM
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN;
localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN;
localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM;
//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM;
localparam integer numwords = sramlen/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen;
@ -436,16 +457,17 @@ module DCacheFlushFSM
genvar index, way, cacheWord;
logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [sramlen-1:0] cacheline;
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
.loglinebytelen(loglinebytelen), .sramlen(sramlen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
@ -463,18 +485,25 @@ module DCacheFlushFSM
end
end
integer i, j, k;
integer i, j, k, l;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
for(l = 0; l < cachesramwords; l++) begin
if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin
for(k = 0; k < numwords; k++) begin
//cacheline = CacheData[j][i][0];
// does not work with modelsim
// # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions.
// see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions
//ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k];
ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN];
end
end
end
end
end
end
end
@ -484,15 +513,15 @@ module DCacheFlushFSM
endmodule
module copyShadow
#(parameter tagstart, loglinebytelen)
#(parameter tagstart, loglinebytelen, sramlen)
(input logic clk,
input logic start,
input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty,
input logic [`XLEN-1:0] data,
input logic [sramlen-1:0] data,
input logic [32-1:0] index,
input logic [32-1:0] cacheWord,
output logic [`XLEN-1:0] CacheData,
output logic [sramlen-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag,
output logic CacheValid,
@ -505,7 +534,7 @@ module copyShadow
CacheValid = valid;
CacheDirty = dirty;
CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8));
end
end
@ -531,4 +560,4 @@ task automatic updateProgramAddrLabelArray;
end
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask
endtask

Some files were not shown because too many files have changed in this diff Show More