This commit is contained in:
Madeleine Masser-Frye 2022-06-07 18:31:54 +00:00
commit c46fbf2260
18 changed files with 1797 additions and 1741 deletions

View File

@ -2,39 +2,56 @@
# Expanded and developed by dtorres@hmc.edu # Expanded and developed by dtorres@hmc.edu
# Compile Embench for Wally # Compile Embench for Wally
all: build sim all: sim size
allClean: clean all allClean: clean all
build: buildspeed buildsize build: buildspeed buildsize
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
buildspeed: buildspeed:
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" ../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
buildsize: buildsize:
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0" ../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
sim: modelSimBuild speed # builds dependencies, then launches modelsim and finally runs python wrapper script to present results
sim: modelsim_build_memfile modelsim_run speed
# vsim: # launches modelsim to simulate tests on wally
# cd ../../pipelined/regression/ modelsim_run:
# vsim -c -do "do wally-pipelined-batch.do rv32gc embench" (cd ../../pipelined/regression/ && vsim -c -do "do wally-pipelined-batch.do rv32gc embench")
# cd ../../benchmarks/embench/ cd ../../benchmarks/embench/
modelSimBuild: buildspeed objdump
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
size:
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
# builds the objdump based on the compiled c elf files
objdump: buildspeed objdump: buildspeed
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
# build memfiles, objdump.lab and objdump.addr files
modelsim_build_memfile: objdump
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
# builds the tests for speed, runs them on spike and then launches python script to present results
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
# you'll need to manually remove one of the two .output files, or run make clean
spike: buildspeed spikecmd speed
# command to run spike on all of the benchmarks
spike_run: buildspeed
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
# python wrapper to present results of embench size benchmark
size: buildsize
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
# python wrapper to present results of embench speed benchmark
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
# deletes all files
clean: clean:
rm -rf ../../addins/embench-iot/bd_speed/ rm -rf ../../addins/embench-iot/bd_speed/
rm -rf ../../addins/embench-iot/bd_size/ rm -rf ../../addins/embench-iot/bd_size/
@ -42,13 +59,4 @@ clean:
allclean: clean allclean: clean
rm -rf ../../addins/embench-iot/logs/ rm -rf ../../addins/embench-iot/logs/
# std: # riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
# --dummy-libs="libgcc libm libc"
# --cflags "-O2 -g -nostartfiles"
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
# --user-libs="-lm"
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
# find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump.lab" | while read f; do grep -n "begin_signature" $f | cut -f1 -d:

View File

@ -327,7 +327,7 @@ connect_debug_port u_ila_0/probe72 [get_nets [list wallypipelinedsoc/core/hzu/BP
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe73] set_property port_width 1 [get_debug_ports u_ila_0/probe73]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe73] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe73]
connect_debug_port u_ila_0/probe73 [get_nets [list wallypipelinedsoc/core/hzu/CSRWritePendingDEM ]] connect_debug_port u_ila_0/probe73 [get_nets [list wallypipelinedsoc/core/hzu/CSRWriteFencePendingDEM ]]
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe74] set_property port_width 1 [get_debug_ports u_ila_0/probe74]
@ -402,7 +402,7 @@ connect_debug_port u_ila_0/probe87 [get_nets [list wallypipelinedsoc/core/hzu/Br
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe88] set_property port_width 1 [get_debug_ports u_ila_0/probe88]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe88] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe88]
connect_debug_port u_ila_0/probe88 [get_nets [list wallypipelinedsoc/core/hzu/InvalidateICacheM ]] connect_debug_port u_ila_0/probe88 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe89] set_property port_width 1 [get_debug_ports u_ila_0/probe89]
@ -433,7 +433,8 @@ connect_debug_port u_ila_0/probe93 [get_nets [list wallypipelinedsoc/core/hzu/St
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe94] set_property port_width 1 [get_debug_ports u_ila_0/probe94]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe94] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe94]
connect_debug_port u_ila_0/probe94 [get_nets [list wallypipelinedsoc/core/hzu/FlushF ]] connect_debug_port u_ila_0/probe94 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]
create_debug_port u_ila_0 probe create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe95] set_property port_width 1 [get_debug_ports u_ila_0/probe95]
@ -835,8 +836,4 @@ set_property port_width 4 [get_debug_ports u_ila_0/probe171]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe171] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe171]
connect_debug_port u_ila_0/probe171 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[0]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[1]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[2]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[3]} ]] connect_debug_port u_ila_0/probe171 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[0]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[1]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[2]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[3]} ]]
create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe172]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe172]
connect_debug_port u_ila_0/probe172 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]

View File

@ -38,7 +38,8 @@
`define IEEE754 0 `define IEEE754 0
// MISA RISC-V configuration per specification // MISA RISC-V configuration per specification
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) // ZYXWVUTSRQPONMLKJIHGFEDCBA
`define MISA 32'b0000000000101000001000100101101
`define ZICSR_SUPPORTED 1 `define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32 `define COUNTERS 32

View File

@ -51,43 +51,45 @@
`define PMPCFG_ENTRIES (`PMP_ENTRIES/8) `define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
// Floating point constants for Quad, Double, Single, and Half precisions // Floating point constants for Quad, Double, Single, and Half precisions
`define Q_LEN 128 `define Q_LEN 32'd128
`define Q_NE 15 `define Q_NE 32'd15
`define Q_NF 112 `define Q_NF 32'd112
`define Q_BIAS 16383 `define Q_BIAS 32'd16383
`define D_LEN 64 `define D_LEN 32'd64
`define D_NE 11 `define D_NE 32'd11
`define D_NF 52 `define D_NF 32'd52
`define D_BIAS 1023 `define D_BIAS 32'd1023
`define S_LEN 32 `define D_FMT 32'd1
`define S_NE 8 `define S_LEN 32'd32
`define S_NF 23 `define S_NE 32'd8
`define S_BIAS 127 `define S_NF 32'd23
`define H_LEN 16 `define S_BIAS 32'd127
`define H_NE 5 `define S_FMT 32'd1
`define H_NF 10 `define H_LEN 32'd16
`define H_BIAS 15 `define H_NE 32'd5
`define H_NF 32'd10
`define H_BIAS 32'd15
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN) `define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE) `define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE)
`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) `define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF)
`define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2) `define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : `F_SUPPORTED ? 2'd0 : 2'd2)
`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS) `define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
// Floating point constants needed for FPU paramerterization // Floating point constants needed for FPU paramerterization
`define FPSIZES ((3)'(`Q_SUPPORTED)+(3)'(`D_SUPPORTED)+(3)'(`F_SUPPORTED)+(3)'(`ZFH_SUPPORTED)) `define FPSIZES ((32)'(`Q_SUPPORTED)+(32)'(`D_SUPPORTED)+(32)'(`F_SUPPORTED)+(32)'(`ZFH_SUPPORTED))
`define FMTBITS (((`FPSIZES==3'b011)|(`FPSIZES==3'b100)) ? 2 : 1) `define FMTBITS ((`FPSIZES>=3)+1)
`define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN) `define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN)
`define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE) `define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE)
`define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF) `define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
`define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0 : 2) `define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 2'd1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 2'd0 : 2'd2)
`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS) `define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
`define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN) `define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN)
`define NE2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE) `define NE2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE)
`define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF) `define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF)
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0 : 2) `define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// Disable spurious Verilator warnings // Disable spurious Verilator warnings

View File

@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
verilator=`which verilator` verilator=`which verilator`
basepath=$(dirname $0)/.. basepath=$(dirname $0)/..
for config in rv64fp rv32e rv64gc rv32gc rv32ic; do for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do
echo "$config linting..." echo "$config linting..."
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
echo "Exiting after $config lint due to errors or warnings" echo "Exiting after $config lint due to errors or warnings"

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv32e imperas64d" vsim -do "do wally-pipelined.do rv32gc arch32f"

View File

@ -122,7 +122,7 @@ module fctrl (
else if (`FPSIZES == 2)begin else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp; logic [1:0] FmtTmp;
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = `FMT == FmtTmp; assign FmtD = (`FMT == FmtTmp);
end end
else if (`FPSIZES == 3|`FPSIZES == 4) else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];

View File

@ -2,6 +2,7 @@
`include "wally-config.vh" `include "wally-config.vh"
// largest length in IEU/FPU // largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF) `define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
module fcvt ( module fcvt (
input logic XSgnE, // input's sign input logic XSgnE, // input's sign
@ -42,7 +43,7 @@ module fcvt (
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder) logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CalcExp; // the calculated expoent logic [`NE:0] CalcExp; // the calculated expoent
logic [$clog2(`LGLEN+1)-1:0] ShiftAmt; // how much to shift by logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
logic ResDenormUf;// does the result underflow or is denormalized logic ResDenormUf;// does the result underflow or is denormalized
logic ResUf; // does the result underflow logic ResUf; // does the result underflow
@ -72,7 +73,7 @@ module fcvt (
logic Int64; // is the integer 64 bits? logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion? logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion? logic ToInt; // is the opperation an fp->int conversion?
logic [$clog2(`LGLEN+1)-1:0] ZeroCnt; // output from the LZC logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
// seperate OpCtrl for code readability // seperate OpCtrl for code readability
@ -143,9 +144,9 @@ module fcvt (
// - only shift fp -> fp if the intital value is denormalized // - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa // - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN+1)-1:0]&{$clog2(`LGLEN+1){~CalcExp[`NE]}} : assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
ResDenormUf&~IntToFp ? ($clog2(`LGLEN+1))'(`NF-1)+CalcExp[$clog2(`LGLEN+1)-1:0] : ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
(ZeroCnt+1)&{$clog2(`LGLEN+1){XDenormE|IntToFp}}; (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
// shift // shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp // fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
@ -261,34 +262,34 @@ module fcvt (
// - shift left to normilize (-1-ZeroCnt) // - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent // - newBias to make the biased exponent
// //
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN+1)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN+1){XDenormE|IntToFp}})}; assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
// find if the result is dnormal or underflows // find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion // - can't underflow an integer to Fp conversion
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp; assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
// choose the negative of the fraction size // choose the negative of the fraction size
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign ResNegNF = -`NF; assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -`NF : -`NF1; assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
`FMT: ResNegNF = -`NF; `FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -`NF1; `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -`NF2; `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx; default: ResNegNF = 1'bx;
endcase endcase
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
2'h3: ResNegNF = -`Q_NF; 2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -`D_NF; 2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -`S_NF; 2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -`H_NF; 2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase endcase
end end
// determine if the result underflows ??? -> fp // determine if the result underflows ??? -> fp
@ -453,10 +454,10 @@ module fcvt (
// find the maximum exponent (the exponent and larger overflows) // find the maximum exponent (the exponent and larger overflows)
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
assign MaxExp = ToInt ? Int64 ? 65 : 33 : {`NE{1'b1}}; assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt ? Int64 ? 65 : 33 : assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}}; OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin end else if (`FPSIZES == 3) begin
@ -476,7 +477,7 @@ module fcvt (
MaxExpFp = 1'bx; MaxExpFp = 1'bx;
end end
endcase endcase
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp; assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp; logic [`NE-1:0] MaxExpFp;
@ -495,7 +496,7 @@ module fcvt (
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}}; MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end end
endcase endcase
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp; assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end end
// if the result exponent is larger then the maximum possible exponent // if the result exponent is larger then the maximum possible exponent

View File

@ -81,7 +81,7 @@ module fma(
// E/M pipeline registers // E/M pipeline registers
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE}, {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM}); {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
@ -237,7 +237,7 @@ module align(
// - positive means the product is larger, so shift Z right // - positive means the product is larger, so shift Z right
// *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22 // *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22
// KP- yes we used ProdExpE originally but we did this for timing // KP- yes we used ProdExpE originally but we did this for timing
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpE}; assign AlignCnt = XZeroE|YZeroE ? -(`NE+2)'($unsigned(1)) : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF)+3 - {2'b0, ZExpE};
// Defualt Addition without shifting // Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
@ -320,7 +320,7 @@ module add(
// Do the addition // Do the addition
// - calculate a positive and negitive sum in parallel // - calculate a positive and negitive sum in parallel
assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; assign PreSum = AlignedAddendInv + {{`NF+3{1'b0}}, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)}; assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)};
// Is the sum negitive // Is the sum negitive
@ -543,7 +543,7 @@ module normalize(
assign SumZero = ~(|SumM); assign SumZero = ~(|SumM);
// calculate the sum's exponent // calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision //convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin if (`FPSIZES == 1) begin
@ -556,8 +556,8 @@ module normalize(
always_comb begin always_comb begin
case (FmtM) case (FmtM)
`FMT: SumExpTmp = SumExpTmpTmp; `FMT: SumExpTmp = SumExpTmpTmp;
`FMT1: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}}; `FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
`FMT2: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}}; `FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
default: SumExpTmp = `NE+2'bx; default: SumExpTmp = `NE+2'bx;
endcase endcase
end end
@ -566,9 +566,9 @@ module normalize(
always_comb begin always_comb begin
case (FmtM) case (FmtM)
2'h3: SumExpTmp = SumExpTmpTmp; 2'h3: SumExpTmp = SumExpTmpTmp;
2'h1: SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}}; 2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h0: SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}}; 2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h2: SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}}; 2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
endcase endcase
end end
@ -674,7 +674,7 @@ module normalize(
// Determine sum's exponent // Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}}; assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
// recalculate if the result is denormalized // recalculate if the result is denormalized
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];

View File

@ -107,7 +107,7 @@ module fpu (
logic FOpCtrlQ; logic FOpCtrlQ;
// result and flag signals // result and flag signals
logic [`FLEN-1:0] FDivResM, FDivResW; // divide/squareroot result logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result logic [4:0] FMAFlgM; // FMA/multiply result
@ -125,7 +125,7 @@ module fpu (
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals // other signals
logic FDivSqrtDoneE; // is divide done logic FDivSqrtDoneE; // is divide done
logic [`FLEN-1:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt logic load_preload; // enable for FF on fpdivsqrt
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
@ -184,11 +184,11 @@ module fpu (
generate generate
if(`FPSIZES == 1) assign BoxedZeroE = 0; if(`FPSIZES == 1) assign BoxedZeroE = 0;
else if(`FPSIZES == 2) else if(`FPSIZES == 2)
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`FLEN-`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4) else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, (`FLEN-`S_LEN)'(0)}, mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
{{`FLEN-`D_LEN{1'b1}}, (`FLEN-`D_LEN)'(0)}, {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
{{`FLEN-`H_LEN{1'b1}}, (`FLEN-`H_LEN)'(0)}, {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate endgenerate
@ -218,17 +218,27 @@ module fpu (
.FMAFlgM, .FMAResM); .FMAFlgM, .FMAResM);
// fpdivsqrt using Goldschmidt's iteration // fpdivsqrt using Goldschmidt's iteration
flopenrc #(`FLEN) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), if(`FLEN == 64) begin
flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload), .clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk)); .reset(reset), .clk(clk));
flopenrc #(`FLEN) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload), .clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk)); .reset(reset), .clk(clk));
flopenrc #(8+int'(`FMTBITS-1)) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), end
else if (`FLEN == 32) begin
flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
end
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
.clear(FDivSqrtDoneE), .en(load_preload), .clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk)); .reset(reset), .clk(clk));
fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
.reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
@ -244,7 +254,8 @@ module fpu (
// data to be stored in memory - to IEU // data to be stored in memory - to IEU
// - FP uses NaN-blocking format // - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0]; if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0];
else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE};
// NaN Block SrcA // NaN Block SrcA
generate generate
@ -262,8 +273,12 @@ module fpu (
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE); mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU // select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, if (`FLEN>`XLEN)
CvtIntResE, FIntResSelE, FIntResE); mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
else
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram // *** make sure the fpu matches the chapter diagram
@ -290,7 +305,7 @@ module fpu (
// M/W pipe registers // M/W pipe registers
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(`FLEN) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW, flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM}, {FRegWriteM, FResultSelM, FmtM},
@ -313,5 +328,6 @@ module fpu (
endgenerate endgenerate
// select the result to be written to the FP register // select the result to be written to the FP register
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); if(`FLEN>=64)
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
endmodule // fpu endmodule // fpu

View File

@ -1,11 +1,11 @@
`include "wally-config.vh" `include "wally-config.vh"
module unpack ( module unpack (
input logic [$signed(`FLEN)-$signed(1):0] X, Y, Z, // inputs from register file input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [$signed(`FMTBITS)-$signed(1):0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [$signed(`NE)-$signed(1):0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [$signed(`NF):0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XDenormE, ZDenormE, // is XYZ denormalized
@ -14,7 +14,7 @@ module unpack (
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
); );
logic [$signed(`NF)-$signed(1):0] XFracE, YFracE, ZFracE; //Fraction of XYZ logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s logic YExpMaxE, ZExpMaxE; // is the exponent all 1s

View File

@ -17,7 +17,7 @@ if [file exists work] {
} }
vlib work vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbench -o workopt vopt +acc work.testbench -o workopt
vsim workopt vsim workopt

View File

@ -30,7 +30,11 @@
`include "wally-config.vh" `include "wally-config.vh"
module srt #(parameter Nf=52) ( `define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF)
`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk, input logic clk,
input logic Start, input logic Start,
input logic Stall, // *** multiple pipe stages input logic Stall, // *** multiple pipe stages
@ -39,7 +43,7 @@ module srt #(parameter Nf=52) (
// later add exponents, signs, special cases // later add exponents, signs, special cases
input logic XSign, YSign, input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp, input logic [`NE-1:0] XExp, YExp,
input logic [Nf-1:0] SrcXFrac, SrcYFrac, input logic [`NF-1:0] SrcXFrac, SrcYFrac,
input logic [`XLEN-1:0] SrcA, SrcB, input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64 input logic W64, // 32-bit ints on XLEN=64
@ -47,7 +51,7 @@ module srt #(parameter Nf=52) (
input logic Int, // Choose integer inputs input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide input logic Sqrt, // perform square root, not divide
output logic rsign, output logic rsign,
output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers output logic [`DIVLEN-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
output logic [`NE-1:0] rExp, output logic [`NE-1:0] rExp,
output logic [3:0] Flags output logic [3:0] Flags
); );
@ -55,38 +59,40 @@ module srt #(parameter Nf=52) (
logic qp, qz, qm; // quotient is +1, 0, or -1 logic qp, qz, qm; // quotient is +1, 0, or -1
logic [`NE-1:0] calcExp; logic [`NE-1:0] calcExp;
logic calcSign; logic calcSign;
logic [Nf-1:0] X, Dpreproc; logic [`DIVLEN-1:0] X, Dpreproc;
logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [Nf+2:0] rp, rm; logic [`DIVLEN+2:0] rp, rm;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
srtpreproc #(Nf) preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc); srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
// Top Muxes and Registers // Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider. // When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder // Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration. // is fed back for the next iteration.
mux2 #(Nf+4) wsmux({WSA[54:0], 1'b0}, {4'b0001, X}, Start, WSN); mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+2:0], 1'b0}, {4'b0001, X}, Start, WSN);
flop #(Nf+4) wsflop(clk, WSN, WS); flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(Nf+4) wcmux({WCA[54:0], 1'b0}, 56'b0, Start, WCN); mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+2:0], 1'b0}, {(`DIVLEN+4){1'b0}}, Start, WCN);
flop #(Nf+4) wcflop(clk, WCN, WC); flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(Nf+4) dflop(clk, Start, {4'b0001, Dpreproc}, D); flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
// Quotient Selection logic // Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// Accumulate quotient digits in a shift register qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
qsel #(Nf) qsel(WS[55:52], WC[55:52], qp, qz, qm); // Accumulate quotient digits in a shift register (now done in OTFC)
qacc #(Nf+3) qacc(clk, Start, qp, qz, qm, rp, rm); qacc #(`DIVLEN+3) qacc(clk, Start, qp, qz, qm, rp, rm);
flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign); flopen #(1) signflop(clk, Start, calcSign, rsign);
// Divisor Selection logic // Divisor Selection logic
inv dinv(D, Db); inv dinv(D, Db);
mux3onehot divisorsel(Db, 56'b0, D, qp, qz, qm, Dsel); mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
// Partial Product Generation // Partial Product Generation
csa csa(WS, WC, Dsel, qp, WSA, WCA); csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
otfc2 otfc2(clk, Start, qp, qz, qm, QuotOTFC); otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, QuotOTFC);
expcalc expcalc(.XExp, .YExp, .calcExp); expcalc expcalc(.XExp, .YExp, .calcExp);
@ -95,70 +101,60 @@ module srt #(parameter Nf=52) (
srtpostproc postproc(rp, rm, Quot); srtpostproc postproc(rp, rm, Quot);
endmodule endmodule
module srtpostproc #(parameter N=52) ( ////////////////
input [N+2:0] rp, rm, // Submodules //
output [N-1:0] Quot ////////////////
);
//assign Quot = rp - rm; ///////////////////
finaladd finaladd(rp, rm, Quot); // Preprocessing //
endmodule ///////////////////
module srtpreproc (
module srtpreproc #(parameter Nf=52) (
input logic [`XLEN-1:0] SrcA, SrcB, input logic [`XLEN-1:0] SrcA, SrcB,
input logic [Nf-1:0] SrcXFrac, SrcYFrac, input logic [`NF-1:0] SrcXFrac, SrcYFrac,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64 input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputss input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide input logic Sqrt, // perform square root, not divide
output logic [Nf-1:0] X, D output logic [`DIVLEN-1:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
); );
// Initial: just pass X and Y through for simple fp division logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
assign X = SrcXFrac; logic [`XLEN-1:0] PosA, PosB;
assign D = SrcYFrac; logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule endmodule
/* /////////////////////////////////
// Quotient Selection, Radix 2 //
////////// /////////////////////////////////
// mux2 // module qsel2 ( // *** eventually just change to 4 bits
////////// input logic [`DIVLEN+3:`DIVLEN] ps, pc,
module mux2(input logic [55:0] in0, in1,
input logic sel,
output logic [55:0] out);
assign #1 out = sel ? in1 : in0;
endmodule
//////////
// flop //
//////////
module flop(clk, in, out);
input clk;
input [55:0] in;
output [55:0] out;
logic [55:0] state;
always @(posedge clk)
state <= #1 in;
assign #1 out = state;
endmodule
*/
//////////
// qsel //
//////////
module qsel #(parameter Nf=52) ( // *** eventually just change to 4 bits
input logic [Nf+3:Nf] ps, pc,
output logic qp, qz, qm output logic qp, qz, qm
); );
logic [Nf+3:Nf] p, g; logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout; logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not // The quotient selection logic is presented for simplicity, not
@ -169,9 +165,9 @@ module qsel #(parameter Nf=52) ( // *** eventually just change to 4 bits
assign p = ps ^ pc; assign p = ps ^ pc;
assign g = ps & pc; assign g = ps & pc;
assign #1 magnitude = ~(&p[54:52]); assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[54] | (p[54] & (g[53] | p[53] & g[52])); assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 sign = p[55] ^ cout; assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52])); (ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^ assign #1 sign = (ps[55]^pc[55])^
@ -188,15 +184,16 @@ endmodule
////////// //////////
// qacc // // qacc //
////////// //////////
module qacc #(parameter N=55) ( // To be replaced by OTFC
module qacc #(parameter N=68) (
input logic clk, input logic clk,
input logic req, input logic req,
input logic qp, qz, qm, input logic qp, qz, qm,
output logic [N-1:0] rp, rm output logic [N-1:0] rp, rm
); );
flopr #(N) rmreg(clk, req, {rm[53:0], qm}, rm); flopr #(N) rmreg(clk, req, {rm[N-2:0], qm}, rm);
flopr #(N) rpreg(clk, req, {rp[53:0], qp}, rp); flopr #(N) rpreg(clk, req, {rp[N-2:0], qp}, rp);
/* always @(posedge clk) /* always @(posedge clk)
begin begin
if (req) if (req)
@ -212,11 +209,10 @@ module qacc #(parameter N=55) (
end */ end */
endmodule endmodule
////////// ///////////////////////////////////
// otfc // // On-The-Fly Converter, Radix 2 //
////////// ///////////////////////////////////
module otfc2 #(parameter N=65) (
module otfc2 #(parameter N=52) (
input logic clk, input logic clk,
input logic Start, input logic Start,
input logic qp, qz, qm, input logic qp, qz, qm,
@ -255,16 +251,15 @@ module otfc2 #(parameter N=52) (
QMNext = {QMR, 1'b0}; QMNext = {QMR, 1'b0};
end end
end end
assign r = Q[54] ? Q[53:2] : Q[52:1]; assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
endmodule endmodule
///////// /////////
// inv // // inv //
///////// /////////
module inv(input logic [`DIVLEN+3:0] in,
module inv(input logic [55:0] in, output logic [`DIVLEN+3:0] out);
output logic [55:0] out);
assign #1 out = ~in; assign #1 out = ~in;
endmodule endmodule
@ -272,14 +267,11 @@ endmodule
////////// //////////
// mux3 // // mux3 //
////////// //////////
module mux3onehot(in0, in1, in2, sel0, sel1, sel2, out); module mux3onehot #(parameter N=65) (
input [55:0] in0; input logic [N+3:0] in0, in1, in2,
input [55:0] in1; input logic sel0, sel1, sel2,
input [55:0] in2; output logic [N+3:0] out
input sel0; );
input sel1;
input sel2;
output [55:0] out;
// lazy inspection of the selects // lazy inspection of the selects
// really we should make sure selects are mutually exclusive // really we should make sure selects are mutually exclusive
@ -290,7 +282,7 @@ endmodule
///////// /////////
// csa // // csa //
///////// /////////
module csa #(parameter N=56) ( module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3, input logic [N-1:0] in1, in2, in3,
input logic cin, input logic cin,
output logic [N-1:0] out1, out2 output logic [N-1:0] out1, out2
@ -305,28 +297,26 @@ module csa #(parameter N=56) (
// insert cin. // insert cin.
assign #1 out1 = in1 ^ in2 ^ in3; assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[54:0] & (in2[54:0] | in3[54:0]) | assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[54:0] & in3[54:0]), cin}; (in2[N-2:0] & in3[N-2:0]), cin};
endmodule endmodule
////////////// //////////////
// expcalc // // expcalc //
////////////// //////////////
module expcalc( module expcalc(
input logic [`NE-1:0] XExp, YExp, input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp output logic [`NE-1:0] calcExp
); );
assign calcExp = XExp - YExp + 11'b01111111111; assign calcExp = XExp - YExp + (`NE)'(`BIAS);
endmodule endmodule
////////////// //////////////
// signcalc // // signcalc //
////////////// //////////////
module signcalc( module signcalc(
input logic XSign, YSign, input logic XSign, YSign,
output logic calcSign output logic calcSign
@ -336,15 +326,27 @@ module signcalc(
endmodule endmodule
////////////////////
// Postprocessing //
////////////////////
module srtpostproc (
input [`DIVLEN+2:0] rp, rm,
output [`DIVLEN-1:0] Quot
);
//assign Quot = rp - rm;
finaladd #(`DIVLEN+3) finaladd(rp, rm, Quot);
endmodule
////////////// //////////////
// finaladd // // finaladd //
////////////// //////////////
module finaladd( module finaladd #(parameter N=68) (
input logic [54:0] rp, rm, input logic [N-1:0] rp, rm,
output logic [51:0] r output logic [N-4:0] r
); );
logic [54:0] diff; logic [N-1:0] diff;
// this magic block performs the final addition for you // this magic block performs the final addition for you
// to convert the positive and negative quotient digits // to convert the positive and negative quotient digits
@ -359,6 +361,6 @@ module finaladd(
// The checker ignores such an error. // The checker ignores such an error.
assign #1 diff = rp - rm; assign #1 diff = rp - rm;
assign #1 r = diff[54] ? diff[53:2] : diff[52:1]; assign #1 r = diff[N-1] ? diff[N-2:2] : diff[N-3:1];
endmodule endmodule

View File

@ -1,3 +1,5 @@
`define DIVLEN 65
///////////// /////////////
// counter // // counter //
///////////// /////////////
@ -37,15 +39,16 @@ endmodule
// testbench // // testbench //
////////// //////////
module testbench; module testbench;
logic clk; logic clk;
logic req; logic req;
logic done; logic done;
logic [63:0] a, b; logic [63:0] a, b;
logic [51:0] afrac, bfrac; logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp; logic [10:0] aExp, bExp;
logic asign, bsign; logic asign, bsign;
logic [51:0] r, rOTFC; logic [51:0] r, rOTFC;
logic [54:0] rp, rm; // positive quotient digits logic [`DIVLEN-1:0] Quot, QuotOTFC;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters // Test parameters
parameter MEM_SIZE = 40000; parameter MEM_SIZE = 40000;
@ -65,14 +68,14 @@ module testbench;
integer testnum, errors; integer testnum, errors;
// Divider // Divider
srt #(52) srt(.clk, .Start(req), srt srt(.clk, .Start(req),
.Stall(1'b0), .Flush(1'b0), .Stall(1'b0), .Flush(1'b0),
.XExp(aExp), .YExp(bExp), .rExp, .XExp(aExp), .YExp(bExp), .rExp,
.XSign(asign), .YSign(bsign), .rsign, .XSign(asign), .YSign(bsign), .rsign,
.SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00), .SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags()); .Quot, .QuotOTFC, .Rem(), .Flags());
// Counter // Counter
counter counter(clk, req, done); counter counter(clk, req, done);
@ -98,6 +101,8 @@ module testbench;
b = Vec[`memb]; b = Vec[`memb];
{bsign, bExp, bfrac} = b; {bsign, bExp, bfrac} = b;
nextr = Vec[`memr]; nextr = Vec[`memr];
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
req <= #5 1; req <= #5 1;
end end

View File

@ -261,26 +261,26 @@ module testbenchfp;
Fmt = {Fmt, 2'b11}; Fmt = {Fmt, 2'b11};
end end
end end
if (TEST === "div" | TEST === "all") begin // if division is being tested // if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the divide tests/op-ctrls/unit/fmt // // add the divide tests/op-ctrls/unit/fmt
Tests = {Tests, f128div}; // Tests = {Tests, f128div};
OpCtrl = {OpCtrl, `DIV_OPCTRL}; // OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11}; // Fmt = {Fmt, 2'b11};
end // end
end // end
if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
// add the square-root tests/op-ctrls/unit/fmt // // add the square-root tests/op-ctrls/unit/fmt
Tests = {Tests, f128sqrt}; // Tests = {Tests, f128sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL}; // OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11}; // Fmt = {Fmt, 2'b11};
end // end
end // end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
// add each rounding mode to it's own list of tests // add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel // - fma tests are very long, so run all rounding modes in parallel
@ -390,26 +390,26 @@ module testbenchfp;
Fmt = {Fmt, 2'b01}; Fmt = {Fmt, 2'b01};
end end
end end
if (TEST === "div" | TEST === "all") begin // if division is being tested // if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64div}; // Tests = {Tests, f64div};
OpCtrl = {OpCtrl, `DIV_OPCTRL}; // OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01}; // Fmt = {Fmt, 2'b01};
end // end
end // end
if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64sqrt}; // Tests = {Tests, f64sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL}; // OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01}; // Fmt = {Fmt, 2'b01};
end // end
end // end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
// add each rounding mode to it's own list of tests // add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel // - fma tests are very long, so run all rounding modes in parallel
@ -502,26 +502,26 @@ module testbenchfp;
Fmt = {Fmt, 2'b00}; Fmt = {Fmt, 2'b00};
end end
end end
if (TEST === "div" | TEST === "all") begin // if division is being tested // if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32div}; // Tests = {Tests, f32div};
OpCtrl = {OpCtrl, `DIV_OPCTRL}; // OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00}; // Fmt = {Fmt, 2'b00};
end // end
end // end
if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32sqrt}; // Tests = {Tests, f32sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL}; // OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00}; // Fmt = {Fmt, 2'b00};
end // end
end // end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests // add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel // - fma tests are very long, so run all rounding modes in parallel
@ -596,26 +596,26 @@ module testbenchfp;
Fmt = {Fmt, 2'b10}; Fmt = {Fmt, 2'b10};
end end
end end
if (TEST === "div" | TEST === "all") begin // if division is being tested // if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16div}; // Tests = {Tests, f16div};
OpCtrl = {OpCtrl, `DIV_OPCTRL}; // OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10}; // Fmt = {Fmt, 2'b10};
end // end
end // end
if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists // // add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16sqrt}; // Tests = {Tests, f16sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL}; // OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0}; // WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin // for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT}; // Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10}; // Fmt = {Fmt, 2'b10};
end // end
end // end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests // add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel // - fma tests are very long, so run all rounding modes in parallel
@ -673,10 +673,10 @@ module testbenchfp;
// - 1 for the larger precision // - 1 for the larger precision
// - 0 for the smaller precision // - 0 for the smaller precision
always_comb begin always_comb begin
if(`FMTBITS == 2) ModFmt = FmtVal; if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
else ModFmt = FmtVal === `FMT; else ModFmt = FmtVal;
if(`FMTBITS == 2) FmaModFmt = FmaFmtVal; if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
else FmaModFmt = FmaFmtVal === `FMT; else FmaModFmt = FmaFmtVal;
end end
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
@ -1028,111 +1028,111 @@ end
// - the sign of the NaN does not matter for the opperations being tested // - the sign of the NaN does not matter for the opperations being tested
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
case (FmaFmtVal) case (FmaFmtVal)
4'b11: FmaRneNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) | (FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) | (FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]}))); (FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
4'b01: FmaRneNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) | (FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) | (FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]}))); (FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
4'b00: FmaRneNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) | (FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) | (FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]}))); (FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
4'b10: FmaRneNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) | (FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) | (FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]}))); (FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
endcase endcase
case (FmaFmtVal) case (FmaFmtVal)
4'b11: FmaRzNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) | (FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) | (FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]}))); (FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
4'b01: FmaRzNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) | (FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) | (FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]}))); (FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
4'b00: FmaRzNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) | (FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) | (FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]}))); (FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
4'b10: FmaRzNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) | (FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) | (FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]}))); (FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
endcase endcase
case (FmaFmtVal) case (FmaFmtVal)
4'b11: FmaRuNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) | (FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) | (FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]}))); (FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
4'b01: FmaRuNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) | (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) | (FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) | (FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]}))); (FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
4'b00: FmaRuNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) | (FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) | (FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]}))); (FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
4'b10: FmaRuNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) | (FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) | (FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]}))); (FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
endcase endcase
case (FmaFmtVal) case (FmaFmtVal)
4'b11: FmaRdNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) | (FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) | (FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]}))); (FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
4'b01: FmaRdNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) | (FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) | (FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]}))); (FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
4'b00: FmaRdNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) | (FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) | (FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]}))); (FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
4'b10: FmaRdNaNGood = (((`IEEE754==0)&(FmaRneRes === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) | (FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) | (FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]}))); (FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
endcase endcase
case (FmaFmtVal) case (FmaFmtVal)
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) | (FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) | (FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]}))); (FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) | (FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) | (FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]}))); (FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) | (FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) | (FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]}))); (FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&(FmaRneRes === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) | (FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) | (FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
@ -1140,22 +1140,22 @@ end
endcase endcase
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
case (FmtVal) case (FmtVal)
4'b11: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) | (XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) |
(YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})) | (YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})) |
(ZNaN&(Res[`Q_LEN-2:0] === {Z[`Q_LEN-2:`Q_NF],1'b1,Z[`Q_NF-2:0]}))); (ZNaN&(Res[`Q_LEN-2:0] === {Z[`Q_LEN-2:`Q_NF],1'b1,Z[`Q_NF-2:0]})));
4'b01: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 4'b01: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) |
(YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | (YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) |
(ZNaN&(Res[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]}))); (ZNaN&(Res[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]})));
4'b00: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 4'b00: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) |
(YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | (YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) |
(ZNaN&(Res[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]}))); (ZNaN&(Res[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]})));
4'b10: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 4'b10: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) |
(YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | (YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) |
@ -1163,22 +1163,22 @@ end
endcase endcase
else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format
case (OpCtrlVal[1:0]) case (OpCtrlVal[1:0])
2'b11: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | 2'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(AnsNaN&(Res[`Q_LEN-2:0] === Ans[`Q_LEN-2:0])) | (AnsNaN&(Res[`Q_LEN-2:0] === Ans[`Q_LEN-2:0])) |
(XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) | (XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) |
(YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]}))); (YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})));
2'b01: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | 2'b01: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(AnsNaN&(Res[`D_LEN-2:0] === Ans[`D_LEN-2:0])) | (AnsNaN&(Res[`D_LEN-2:0] === Ans[`D_LEN-2:0])) |
(XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) |
(YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]}))); (YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})));
2'b00: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | 2'b00: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(AnsNaN&(Res[`S_LEN-2:0] === Ans[`S_LEN-2:0])) | (AnsNaN&(Res[`S_LEN-2:0] === Ans[`S_LEN-2:0])) |
(XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) |
(YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]}))); (YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})));
2'b10: NaNGood = (((`IEEE754==0)&(Res === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | 2'b10: NaNGood = (((`IEEE754==0)&AnsNaN&(Res[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(AnsNaN&(Res[`H_LEN-2:0] === Ans[`H_LEN-2:0])) | (AnsNaN&(Res[`H_LEN-2:0] === Ans[`H_LEN-2:0])) |
(XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) |
@ -1452,14 +1452,14 @@ module readvectors (
case (Fmt) case (Fmt)
2'b11: begin // quad 2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, `Q_NF'h0}; if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8]; Ans = TestVector[8+(`Q_LEN-1):8];
end end
2'b01: begin // double 2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, `D_NF'h0}; else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
@ -1467,7 +1467,7 @@ module readvectors (
2'b00: begin // single 2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, `S_NF'h0}; else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
@ -1475,7 +1475,7 @@ module readvectors (
2'b10: begin // half 2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, `H_NF'h0}; else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};

View File

@ -666,7 +666,7 @@ module testbench;
// turn on waves // turn on waves
if (AttemptedInstructionCount == INSTR_WAVEON) $stop; if (AttemptedInstructionCount == INSTR_WAVEON) $stop;
// end sim // end sim
if ((AttemptedInstructionCount == INSTR_LIMIT) & (INSTR_LIMIT!=0)) $stop; if ((AttemptedInstructionCount == INSTR_LIMIT) & (INSTR_LIMIT!=0)) begin $stop; $stop; end
fault = 0; fault = 0;
if (`DEBUG_TRACE >= 1) begin if (`DEBUG_TRACE >= 1) begin
`checkEQ("PCW",PCW,ExpectedPCW) `checkEQ("PCW",PCW,ExpectedPCW)

View File

@ -127,7 +127,7 @@ logic [3:0] dummy;
end end
string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile; string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
integer outputFilePointer; integer outputFilePointer, ProgramLabelMap, ProgramAddrMap;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout; logic UARTSin, UARTSout;
@ -215,8 +215,31 @@ logic [3:0] dummy;
// Termination condition (i.e. we finished running current test) // Termination condition (i.e. we finished running current test)
if (DCacheFlushDone) begin if (DCacheFlushDone) begin
// Gets the memory location of begin_signature // Gets the memory location of begin_signature
testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8); adrstr = "0";
testadrNoBase = (tests[test+1].atohex())/(`XLEN/8); ProgramLabelMap = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMap = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMap & ProgramAddrMap) begin // check we found both files
while (!$feof(ProgramLabelMap)) begin
string addr, label;
integer returncode;
returncode = $fgets(label, ProgramLabelMap);
returncode = $fgets(addr, ProgramAddrMap);
if (label == "begin_signature\n") begin
adrstr = addr[1:7];
if (adrstr=="0000000") // if running on rv64 we get the address at a later
adrstr = addr[9:15];
if (DEBUG) $display("%s begin_signature adrstr: %s", TEST, adrstr);
end
end
end
if (adrstr == "0") begin
$display("begin_signature addr not found in %s", ProgramLabelMapFile);
end
$fclose(ProgramLabelMap);
$fclose(ProgramAddrMap);
testadr = (`RAM_BASE+adrstr.atohex())/(`XLEN/8);
testadrNoBase = (adrstr.atohex())/(`XLEN/8);
#600; // give time for instructions in pipeline to finish #600; // give time for instructions in pipeline to finish
if (TEST == "embench") begin if (TEST == "embench") begin
// Writes contents of begin_signature to .sim.output file // Writes contents of begin_signature to .sim.output file
@ -294,7 +317,8 @@ logic [3:0] dummy;
end end
end end
// move onto the next test, check to see if we're done // move onto the next test, check to see if we're done
test = test + 2; // test = test + 2;
test = test + 1;
if (test == tests.size()) begin if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures."); if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
else $display("FAIL: %d test programs had errors", totalerrors); else $display("FAIL: %d test programs had errors", totalerrors);

File diff suppressed because it is too large Load Diff