forked from Github_Repos/cvw
Merge branch 'main' into cacheburstmode
This commit is contained in:
commit
284e0395a0
4
.gitignore
vendored
4
.gitignore
vendored
@ -68,6 +68,8 @@ synthDC/alib-52
|
||||
synthDC/*.log
|
||||
synthDC/*.svf
|
||||
synthDC/runs/
|
||||
synthDC/plots/
|
||||
synthDC/runArchive
|
||||
synthDC/hdl
|
||||
/pipelined/regression/power.saif
|
||||
tests/fp/vectors/*.tv
|
||||
@ -104,3 +106,5 @@ pipelined/config/rv64ic_noPriv
|
||||
pipelined/config/rv64ic_orig
|
||||
synthDC/Summary.csv
|
||||
pipelined/srt/exptestgen
|
||||
pipelined/srt/testgen
|
||||
pipelined/srt/qst2
|
||||
|
Binary file not shown.
@ -1 +1 @@
|
||||
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
|
||||
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
|
@ -1,37 +1,62 @@
|
||||
# Makefile added 1/20/22 David_Harris@hmc.edu
|
||||
# Expanded and developed by dtorres@hmc.edu
|
||||
# Compile Embench for Wally
|
||||
|
||||
all: build sim
|
||||
all: sim size
|
||||
|
||||
allClean: clean all
|
||||
|
||||
build:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles"
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib" --cflags="-nostdlib" --dummy-libs="libgcc libm libc crt0"
|
||||
build: buildspeed buildsize
|
||||
|
||||
sim: size speed
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
|
||||
buildspeed:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
|
||||
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||
|
||||
size:
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
|
||||
buildsize:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
|
||||
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
|
||||
sim: modelsim_build_memfile modelsim_run speed
|
||||
|
||||
# launches modelsim to simulate tests on wally
|
||||
modelsim_run:
|
||||
(cd ../../pipelined/regression/ && vsim -c -do "do wally-pipelined-batch.do rv32gc embench")
|
||||
cd ../../benchmarks/embench/
|
||||
|
||||
# builds the objdump based on the compiled c elf files
|
||||
objdump: buildspeed
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
|
||||
|
||||
# build memfiles, objdump.lab and objdump.addr files
|
||||
modelsim_build_memfile: objdump
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
|
||||
|
||||
# builds the tests for speed, runs them on spike and then launches python script to present results
|
||||
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
|
||||
# you'll need to manually remove one of the two .output files, or run make clean
|
||||
spike: buildspeed spikecmd speed
|
||||
|
||||
# command to run spike on all of the benchmarks
|
||||
spike_run: buildspeed
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
|
||||
|
||||
# python wrapper to present results of embench size benchmark
|
||||
size: buildsize
|
||||
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
|
||||
|
||||
# python wrapper to present results of embench speed benchmark
|
||||
speed:
|
||||
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
|
||||
|
||||
objdump:
|
||||
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64.objdump
|
||||
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/cubic/cubic > ../../addins/embench-iot/bd_speed/src/cubic/cubic.objdump
|
||||
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/md5sum/md5sum > ../../addins/embench-iot/bd_speed/src/md5sum/md5sum.objdump
|
||||
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/statemate/statemate > ../../addins/embench-iot/bd_speed/src/statemate/statemate.objdump
|
||||
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
|
||||
|
||||
# deletes all files
|
||||
clean:
|
||||
rm -rf ../../addins/embench-iot/bd_speed/
|
||||
rm -rf ../../addins/embench-iot/bd_size/
|
||||
|
||||
# std:
|
||||
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
|
||||
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
|
||||
# --dummy-libs="libgcc libm libc"
|
||||
# --cflags "-O2 -g -nostartfiles"
|
||||
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
|
||||
# --user-libs="-lm"
|
||||
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
|
||||
allclean: clean
|
||||
rm -rf ../../addins/embench-iot/logs/
|
||||
|
||||
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
|
@ -11,9 +11,6 @@ work/coremark.bare.riscv.elf.memfile: work/coremark.bare.riscv
|
||||
riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@
|
||||
extractFunctionRadix.sh $<.elf.objdump
|
||||
|
||||
work/coremark.bare.riscv.objdump: work/coremark.bare.riscv
|
||||
riscv64-unknown-elf-objdump -D work/coremark.bare.riscv > work/coremark.bare.riscv.objdump
|
||||
|
||||
work/coremark.bare.riscv: $(sources) Makefile
|
||||
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
|
||||
# These flags were used by WD on CoreMark
|
||||
|
@ -327,7 +327,7 @@ connect_debug_port u_ila_0/probe72 [get_nets [list wallypipelinedsoc/core/hzu/BP
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe73]
|
||||
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe73]
|
||||
connect_debug_port u_ila_0/probe73 [get_nets [list wallypipelinedsoc/core/hzu/CSRWritePendingDEM ]]
|
||||
connect_debug_port u_ila_0/probe73 [get_nets [list wallypipelinedsoc/core/hzu/CSRWriteFencePendingDEM ]]
|
||||
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe74]
|
||||
@ -402,7 +402,7 @@ connect_debug_port u_ila_0/probe87 [get_nets [list wallypipelinedsoc/core/hzu/Br
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe88]
|
||||
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe88]
|
||||
connect_debug_port u_ila_0/probe88 [get_nets [list wallypipelinedsoc/core/hzu/InvalidateICacheM ]]
|
||||
connect_debug_port u_ila_0/probe88 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]
|
||||
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe89]
|
||||
@ -433,7 +433,8 @@ connect_debug_port u_ila_0/probe93 [get_nets [list wallypipelinedsoc/core/hzu/St
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe94]
|
||||
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe94]
|
||||
connect_debug_port u_ila_0/probe94 [get_nets [list wallypipelinedsoc/core/hzu/FlushF ]]
|
||||
connect_debug_port u_ila_0/probe94 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]
|
||||
|
||||
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe95]
|
||||
@ -835,8 +836,4 @@ set_property port_width 4 [get_debug_ports u_ila_0/probe171]
|
||||
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe171]
|
||||
connect_debug_port u_ila_0/probe171 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[0]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[1]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[2]} {wallypipelinedsoc/uncore/uart.uart/u/rxfifotail[3]} ]]
|
||||
|
||||
create_debug_port u_ila_0 probe
|
||||
set_property port_width 1 [get_debug_ports u_ila_0/probe172]
|
||||
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe172]
|
||||
connect_debug_port u_ila_0/probe172 [get_nets [list {wallypipelinedsoc/uncore/uart.uart/u/RXerrIP} ]]
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -35,11 +35,11 @@
|
||||
`define XLEN 64
|
||||
|
||||
// IEEE 754 compliance
|
||||
`define IEEE754 1
|
||||
`define IEEE754 0
|
||||
|
||||
// MISA RISC-V configuration per specification
|
||||
//16 - quad 3 - double 5 - single
|
||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 0 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
|
||||
// ZYXWVUTSRQPONMLKJIHGFEDCBA
|
||||
`define MISA 32'b0000000000101000001000100101101
|
||||
`define ZICSR_SUPPORTED 1
|
||||
`define ZIFENCEI_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
@ -52,9 +52,11 @@
|
||||
`define UARCH_SINGLECYCLE 0
|
||||
`define DMEM `MEM_CACHE
|
||||
`define IMEM `MEM_CACHE
|
||||
`define DBUS 1
|
||||
`define IBUS 1
|
||||
`define VIRTMEM_SUPPORTED 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
`define BIGENDIAN_SUPPORTED 0
|
||||
`define BIGENDIAN_SUPPORTED 1
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
`define ITLB_ENTRIES 32
|
||||
@ -82,13 +84,13 @@
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
||||
// WFI Timeout Wait
|
||||
`define WFI_TIMEOUT_BIT 16
|
||||
|
||||
// Peripheral Physiccal Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// WFI Timeout Wait
|
||||
`define WFI_TIMEOUT_BIT 16
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTROM_SUPPORTED 1'b1
|
||||
`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
@ -130,13 +132,12 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
|
||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
`define BPRED_SIZE 10
|
||||
|
||||
|
||||
`define REPLAY 0
|
||||
`define HPTW_WRITES_SUPPORTED 0
|
||||
|
@ -32,33 +32,34 @@
|
||||
`define DESIGN_COMPILER 0
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 32
|
||||
`define XLEN 64
|
||||
|
||||
// IEEE 754 compliance
|
||||
`define IEEE754 0
|
||||
|
||||
// E
|
||||
`define MISA (32'h00000010)
|
||||
`define ZICSR_SUPPORTED 0
|
||||
`define ZIFENCEI_SUPPORTED 0
|
||||
`define COUNTERS 0
|
||||
`define ZICOUNTERS_SUPPORTED 0
|
||||
// MISA RISC-V configuration per specification
|
||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
|
||||
`define ZICSR_SUPPORTED 1
|
||||
`define ZIFENCEI_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
`define ZICOUNTERS_SUPPORTED 1
|
||||
`define ZFH_SUPPORTED 1
|
||||
|
||||
// Microarchitectural Features
|
||||
/// Microarchitectural Features
|
||||
`define UARCH_PIPELINED 1
|
||||
`define UARCH_SUPERSCALR 0
|
||||
`define UARCH_SINGLECYCLE 0
|
||||
// *** replace with MEM_BUS
|
||||
`define DMEM `MEM_NONE
|
||||
`define IMEM `MEM_NONE
|
||||
`define DMEM `MEM_CACHE
|
||||
`define IMEM `MEM_CACHE
|
||||
`define DBUS 1
|
||||
`define IBUS 1
|
||||
`define VIRTMEM_SUPPORTED 0
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 0
|
||||
`define VIRTMEM_SUPPORTED 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
`define BIGENDIAN_SUPPORTED 1
|
||||
|
||||
// TLB configuration. Entries should be a power of 2
|
||||
`define ITLB_ENTRIES 0
|
||||
`define DTLB_ENTRIES 0
|
||||
`define ITLB_ENTRIES 32
|
||||
`define DTLB_ENTRIES 32
|
||||
|
||||
// Cache configuration. Sizes should be a power of two
|
||||
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
|
||||
@ -71,44 +72,49 @@
|
||||
|
||||
// Integer Divider Configuration
|
||||
// DIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
`define DIV_BITSPERCYCLE 1
|
||||
`define DIV_BITSPERCYCLE 4
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 0
|
||||
`define PMP_ENTRIES 64
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 32'h80000000
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
`define BOOTROM_SUPPORTED 1'b1
|
||||
`define BOOTROM_BASE 34'h00001000
|
||||
`define BOOTROM_RANGE 34'h00000FFF
|
||||
`define RAM_SUPPORTED 1'b1
|
||||
`define RAM_BASE 34'h80000000
|
||||
`define RAM_RANGE 34'h07FFFFFF
|
||||
`define EXT_MEM_SUPPORTED 1'b0
|
||||
`define EXT_MEM_BASE 34'h80000000
|
||||
`define EXT_MEM_RANGE 34'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b0
|
||||
`define CLINT_BASE 34'h02000000
|
||||
`define CLINT_RANGE 34'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b0
|
||||
`define GPIO_BASE 34'h10060000
|
||||
`define GPIO_RANGE 34'h000000FF
|
||||
`define UART_SUPPORTED 1'b0
|
||||
`define UART_BASE 34'h10000000
|
||||
`define UART_RANGE 34'h00000007
|
||||
`define PLIC_SUPPORTED 1'b0
|
||||
`define PLIC_BASE 34'h0C000000
|
||||
`define PLIC_RANGE 34'h03FFFFFF
|
||||
`define SDC_SUPPORTED 1'b0
|
||||
`define SDC_BASE 34'h00012100
|
||||
`define SDC_RANGE 34'h0000001F
|
||||
`define RESET_VECTOR 64'h0000000080000000
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 32
|
||||
`define AHBW 64
|
||||
|
||||
// WFI Timeout Wait
|
||||
`define WFI_TIMEOUT_BIT 16
|
||||
|
||||
// Peripheral Physiccal Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTROM_SUPPORTED 1'b1
|
||||
`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTROM_RANGE 56'h00000FFF
|
||||
`define RAM_SUPPORTED 1'b1
|
||||
`define RAM_BASE 56'h80000000
|
||||
`define RAM_RANGE 56'h7FFFFFFF
|
||||
`define EXT_MEM_SUPPORTED 1'b0
|
||||
`define EXT_MEM_BASE 56'h80000000
|
||||
`define EXT_MEM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 56'h10060000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
`define SDC_SUPPORTED 1'b0
|
||||
`define SDC_BASE 56'h00012100
|
||||
`define SDC_RANGE 56'h0000001F
|
||||
|
||||
// Test modes
|
||||
|
||||
@ -119,17 +125,18 @@
|
||||
`define UART_PRESCALE 1
|
||||
|
||||
// Interrupt configuration
|
||||
`define PLIC_NUM_SRC 10
|
||||
`define PLIC_NUM_SRC 10
|
||||
// comment out the following if >=32 sources
|
||||
`define PLIC_NUM_SRC_LT_32
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 0
|
||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
`define BPRED_SIZE 10
|
||||
|
||||
`define REPLAY 0
|
||||
`define HPTW_WRITES_SUPPORTED 0
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -51,42 +51,45 @@
|
||||
`define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
|
||||
|
||||
// Floating point constants for Quad, Double, Single, and Half precisions
|
||||
`define Q_LEN 128
|
||||
`define Q_NE 15
|
||||
`define Q_NF 112
|
||||
`define Q_BIAS 16383
|
||||
`define D_LEN 64
|
||||
`define D_NE 11
|
||||
`define D_NF 52
|
||||
`define D_BIAS 1023
|
||||
`define S_LEN 32
|
||||
`define S_NE 8
|
||||
`define S_NF 23
|
||||
`define S_BIAS 127
|
||||
`define H_LEN 16
|
||||
`define H_NE 5
|
||||
`define H_NF 10
|
||||
`define H_BIAS 15
|
||||
`define Q_LEN 32'd128
|
||||
`define Q_NE 32'd15
|
||||
`define Q_NF 32'd112
|
||||
`define Q_BIAS 32'd16383
|
||||
`define D_LEN 32'd64
|
||||
`define D_NE 32'd11
|
||||
`define D_NF 32'd52
|
||||
`define D_BIAS 32'd1023
|
||||
`define D_FMT 32'd1
|
||||
`define S_LEN 32'd32
|
||||
`define S_NE 32'd8
|
||||
`define S_NF 32'd23
|
||||
`define S_BIAS 32'd127
|
||||
`define S_FMT 32'd1
|
||||
`define H_LEN 32'd16
|
||||
`define H_NE 32'd5
|
||||
`define H_NF 32'd10
|
||||
`define H_BIAS 32'd15
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
|
||||
`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE)
|
||||
`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF)
|
||||
`define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2)
|
||||
`define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : `F_SUPPORTED ? 2'd0 : 2'd2)
|
||||
`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// Floating point constants needed for FPU paramerterization
|
||||
`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
|
||||
`define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN)
|
||||
`define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE)
|
||||
`define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
|
||||
`define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0 : 2)
|
||||
`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
`define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN)
|
||||
`define FPSIZES ((32)'(`Q_SUPPORTED)+(32)'(`D_SUPPORTED)+(32)'(`F_SUPPORTED)+(32)'(`ZFH_SUPPORTED))
|
||||
`define FMTBITS ((`FPSIZES>=3)+1)
|
||||
`define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN)
|
||||
`define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE)
|
||||
`define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
|
||||
`define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 2'd1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 2'd0 : 2'd2)
|
||||
`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
`define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN)
|
||||
`define NE2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE)
|
||||
`define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF)
|
||||
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0 : 2)
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
`define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF)
|
||||
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
|
@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
for config in rv32e rv64gc rv32gc rv32ic ; do
|
||||
for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do
|
||||
echo "$config linting..."
|
||||
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
|
||||
echo "Exiting after $config lint due to errors or warnings"
|
||||
|
@ -46,7 +46,7 @@ configs = [
|
||||
]
|
||||
def getBuildrootTC(short):
|
||||
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
|
||||
MAX_EXPECTED = 246000000
|
||||
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
|
||||
if short:
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
|
||||
BRgrepstr=str(INSTR_LIMIT)+" instructions"
|
||||
|
@ -1 +0,0 @@
|
||||
vsim -do wally-fp64.do
|
@ -1,3 +0,0 @@
|
||||
vsim -c <<!
|
||||
do wally-fp64-batch.do rv64gc imperas64d
|
||||
!
|
@ -3,9 +3,10 @@
|
||||
# cvtfp - test floating-point conversion unit (fcvtfp)
|
||||
# cmp - test comparison unit's LT, LE, EQ opperations (fcmp)
|
||||
# add - test addition
|
||||
# fma - test fma
|
||||
# sub - test subtraction
|
||||
# div - test division
|
||||
# sqrt - test square ro
|
||||
# all - test everything
|
||||
|
||||
vsim -do "do fp.do rv64fp cmp"
|
||||
vsim -do "do testfloat.do rv64fpquad cmp"
|
@ -7,4 +7,4 @@
|
||||
# sqrt - test square root
|
||||
# all - test everything
|
||||
|
||||
vsim -c -do "do fp.do rv64fp fma"
|
||||
vsim -c -do "do testfloat.do rv64fpquad all"
|
@ -1,2 +1,2 @@
|
||||
vsim -do "do wally-pipelined.do rv64gc imperas64d"
|
||||
vsim -do "do wally-pipelined.do rv32gc arch32f"
|
||||
|
||||
|
@ -32,7 +32,7 @@ vlib work
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
# $num = the added words after the call
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697
|
||||
|
||||
vsim -voptargs=+acc work.testbenchfp -G TEST=$2
|
||||
|
@ -1,50 +0,0 @@
|
||||
# wally-pipelined-batch.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Usage: do wally-pipelined-batch.do <config> <testcases>
|
||||
# Example: do wally-pipelined-batch.do rv32ic imperas-32i
|
||||
|
||||
# Use this wally-pipelined-batch.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined-batch.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined-batch.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work_${1}_${2}] {
|
||||
vdel -lib work_${1}_${2} -all
|
||||
}
|
||||
vlib work_${1}_${2}
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined-batch.do ../config/rv32ic rv32ic
|
||||
vlog -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583
|
||||
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt work_${1}_${2}.testbench -work work_${1}_${2} -G TEST=$2 -o testbenchopt
|
||||
vsim -lib work_${1}_${2} testbenchopt
|
||||
# Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time
|
||||
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
|
||||
#vsim -coverage -lib work_$2 workopt_$2
|
||||
|
||||
run -all
|
||||
#coverage report -file wally-pipelined-coverage.txt
|
||||
# These aren't doing anything helpful
|
||||
#coverage report -memory
|
||||
#profile report -calltree -file wally-pipelined-calltree.rpt -cutoff 2
|
||||
quit
|
@ -1,54 +0,0 @@
|
||||
# wally-pipelined.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
|
||||
|
||||
# Use this wally-pipelined.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work] {
|
||||
vdel -all
|
||||
}
|
||||
vlib work
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined.do ../config/rv32ic
|
||||
#switch $argc {
|
||||
# 0 {vlog +incdir+../config/rv64ic +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583}
|
||||
# 1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583}
|
||||
#}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vlog +incdir+../config/rv64gc +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583
|
||||
vopt +acc work.testbench -G TEST=imperas64d -o workopt
|
||||
vsim workopt
|
||||
|
||||
view wave
|
||||
-- display input and output signals as hexidecimal values
|
||||
do ./wave-dos/generic.do
|
||||
|
||||
-- Run the Simulation
|
||||
#run 3600
|
||||
run -all
|
||||
#quit
|
||||
#noview ../testbench/testbench-imperas.sv
|
||||
noview ../testbench/testbench.sv
|
||||
view wave
|
@ -35,7 +35,7 @@ vlib wkdir/work_${1}_${2}
|
||||
if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
# start and run simulation
|
||||
vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G DEBUG_TRACE=1 -o testbenchopt
|
||||
vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -o testbenchopt
|
||||
vsim -lib wkdir/work_${1}_${2} testbenchopt -suppress 8852,12070,3084
|
||||
|
||||
run -all
|
||||
|
@ -34,7 +34,7 @@ vlib work
|
||||
if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
# start and run simulation
|
||||
vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G DEBUG_TRACE=1 -o testbenchopt
|
||||
vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G NO_SPOOFING=0 -o testbenchopt
|
||||
vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829
|
||||
|
||||
#-- Run the Simulation
|
||||
@ -48,7 +48,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
} elseif {$2 eq "buildroot-no-trace"} {
|
||||
vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
# start and run simulation
|
||||
vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=0 -G INSTR_WAVEON=0 -G CHECKPOINT=0 -G NO_IE_MTIME_CHECKPOINT=1 -G DEBUG_TRACE=0 -o testbenchopt
|
||||
vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=0 -G INSTR_WAVEON=0 -G CHECKPOINT=0 -G NO_SPOOFING=1 -o testbenchopt
|
||||
vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829
|
||||
|
||||
#-- Run the Simulation
|
||||
|
@ -76,8 +76,6 @@ add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/PMPCFG_ARRAY_RE
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SATP_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SCOUNTEREN_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SEPC_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SIE_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SIP_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SSTATUS_REGW
|
||||
add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/STVEC_REGW
|
||||
add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
|
||||
@ -420,8 +418,6 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
|
||||
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HCLK
|
||||
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESETn
|
||||
add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HSELUART
|
||||
|
@ -1,23 +0,0 @@
|
||||
# Makefile
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
LIBS = -lm
|
||||
LFLAGS = -L.
|
||||
# Link against the riscv-isa-sim version of SoftFloat rather than
|
||||
# the regular version to get RISC-V NaN behavior
|
||||
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
|
||||
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
|
||||
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
|
||||
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
|
||||
SRCS = $(wildcard *.c)
|
||||
|
||||
PROGS = $(patsubst %.c,%,$(SRCS))
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
%: %.c
|
||||
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
clean:
|
||||
rm -f $(PROGS)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,23 +0,0 @@
|
||||
# fma.do
|
||||
#
|
||||
# run with vsim -do "do fma.do"
|
||||
# add -c before -do for batch simulation
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
vlib worklib
|
||||
|
||||
vlog -lint -sv -work worklib fma16.v testbench.v
|
||||
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
|
||||
vsim -lib worklib testbenchopt
|
||||
|
||||
add wave sim:/testbench_fma16/clk
|
||||
add wave sim:/testbench_fma16/reset
|
||||
add wave sim:/testbench_fma16/x
|
||||
add wave sim:/testbench_fma16/y
|
||||
add wave sim:/testbench_fma16/z
|
||||
add wave sim:/testbench_fma16/result
|
||||
add wave sim:/testbench_fma16/rexpected
|
||||
|
||||
run -all
|
@ -1,268 +0,0 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
`define FFLEN 16
|
||||
`define Nf 10
|
||||
`define Ne 5
|
||||
`define BIAS 15
|
||||
`define EMIN (-(2**(`Ne-1)-1))
|
||||
`define EMAX (2**(`Ne-1)-1)
|
||||
|
||||
`define NaN 16'h7E00
|
||||
`define INF 15'h7C00
|
||||
|
||||
// rounding modes *** update
|
||||
`define RZ 3'b00
|
||||
`define RNE 3'b01
|
||||
`define RM 3'b10
|
||||
`define RP 3'b11
|
||||
|
||||
module fma16(
|
||||
input logic [`FFLEN-1:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [`FFLEN-1:0] result);
|
||||
|
||||
logic [`Nf:0] xm, ym, zm; // U1.Nf
|
||||
logic [`Ne-1:0] xe, ye, ze; // B_Ne
|
||||
logic xs, ys, zs;
|
||||
logic zs1; // sign before optional negation
|
||||
logic [2*`Nf+1:0] pm; // U2.2Nf
|
||||
logic [`Ne:0] pe; // B_Ne+1
|
||||
logic ps; // sign of product
|
||||
logic [22:0] rm;
|
||||
logic [`Ne+1:0] re;
|
||||
logic rs;
|
||||
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
|
||||
logic [`Ne+1:0] re2;
|
||||
|
||||
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
|
||||
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
|
||||
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
|
||||
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
|
||||
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
|
||||
endmodule
|
||||
|
||||
module mult16(
|
||||
input logic mul,
|
||||
input logic [`Nf:0] xm, ym,
|
||||
input logic [`Ne-1:0] xe, ye,
|
||||
input logic xs, ys,
|
||||
output logic [2*`Nf+1:0] pm,
|
||||
output logic [`Ne:0] pe,
|
||||
output logic ps);
|
||||
|
||||
// only multiply if mul = 1
|
||||
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
|
||||
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
|
||||
assign ps = xs ^ ys; // negative if X xor Y are negative
|
||||
endmodule
|
||||
|
||||
module add16(
|
||||
input logic add,
|
||||
input logic [2*`Nf+1:0] pm, // U2.2Nf
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [`Ne:0] pe, // B_Ne+1
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic ps, zs,
|
||||
input logic negz,
|
||||
output logic [22:0] rm,
|
||||
output logic [`Ne+1:0] re, // B_Ne+2
|
||||
output logic [`Ne+1:0] re2,
|
||||
output logic rs);
|
||||
|
||||
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
|
||||
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
|
||||
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
|
||||
logic [`Nf-1:0] prezsticky;
|
||||
logic zsticky;
|
||||
logic effectivesub;
|
||||
logic rs0;
|
||||
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
|
||||
logic [`Ne:0] re1;
|
||||
|
||||
// Alignment shift
|
||||
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
|
||||
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
|
||||
always_comb // AlignCount mux; see Muller page 254
|
||||
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
|
||||
else begin AlignCnt = 0; re = {2'b0, ze}; end
|
||||
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
|
||||
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
|
||||
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
|
||||
|
||||
// Effective subtraction
|
||||
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
|
||||
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
|
||||
|
||||
// Adder
|
||||
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
|
||||
assign rs0 = r[`Nf*3+7]; // sign of the initial result
|
||||
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
|
||||
|
||||
// Sign Logic
|
||||
assign rs = ps ^ rs0; // flip the sign if necessary
|
||||
|
||||
// Leading zero counter
|
||||
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
|
||||
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
|
||||
|
||||
// Normalization shift
|
||||
always_comb // NormCount mux
|
||||
if (ExpDiff < 3) begin
|
||||
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
|
||||
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
|
||||
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
|
||||
assign rnormed = r2 << NormCnt; // *** update sticky
|
||||
/* temporarily comment out to start synth
|
||||
|
||||
// One-bit secondary normalization
|
||||
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
|
||||
else begin // *** handle sticky
|
||||
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
|
||||
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
|
||||
else begin rnormed2 = rnormed << 1; re2 = re-1; end
|
||||
end
|
||||
|
||||
// round
|
||||
assign l = rnormed2[***]; // least significant bit
|
||||
assign r = rnormed2[***-1]; // rounding bit
|
||||
assign s = ***; // sticky bit
|
||||
always_comb
|
||||
case (roundmode)
|
||||
RZ: roundup = 0;
|
||||
RP: roundup = ~rs & (r | s);
|
||||
RM: roundup = rs & (r | s);
|
||||
RNE: roundup = r & (s | l);
|
||||
default: roundup = 0;
|
||||
endcase
|
||||
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
|
||||
*/
|
||||
|
||||
// *** need to handle rounding to MAXNUM vs. INFINITY
|
||||
|
||||
// add or pass product through
|
||||
/* assign rm = add ? arm : {1'b0, pm};
|
||||
assign re = add ? are : {1'b0, pe};
|
||||
assign rs = add ? ars : ps; */
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [`Nf*3+7:0] r2,
|
||||
output logic [`Ne:0] leadingzeros
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module postproc16(
|
||||
input logic [1:0] roundmode,
|
||||
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
|
||||
input logic [22:0] rm,
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [6:0] re,
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic rs, zs, ps,
|
||||
input logic [`Ne+1:0] re2,
|
||||
output logic [15:0] result);
|
||||
|
||||
logic [9:0] uf, uff;
|
||||
logic [6:0] ue;
|
||||
logic [6:0] ueb, uebiased;
|
||||
logic invalid;
|
||||
|
||||
// Special cases
|
||||
// *** not handling signaling NaN
|
||||
// *** also add overflow/underflow/inexact
|
||||
always_comb begin
|
||||
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
|
||||
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
|
||||
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
|
||||
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
|
||||
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
|
||||
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
|
||||
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
|
||||
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
|
||||
end
|
||||
|
||||
always_comb
|
||||
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
|
||||
ue = re + 7'b1;
|
||||
uf = rm[20:11];
|
||||
end else begin // no normalization shift needed
|
||||
ue = re;
|
||||
uf = rm[19:10];
|
||||
end
|
||||
|
||||
// overflow
|
||||
always_comb begin
|
||||
ueb = ue-7'd15;
|
||||
if (ue >= 7'd46) begin // overflow
|
||||
/* uebiased = 7'd30;
|
||||
uff = 10'h3ff; */
|
||||
end else begin
|
||||
uebiased = ue-7'd15;
|
||||
uff = uf;
|
||||
end
|
||||
end
|
||||
|
||||
assign result = {rs, uebiased[4:0], uff};
|
||||
|
||||
// add special case handling for zeros, NaN, Infinity
|
||||
endmodule
|
||||
|
||||
module signadj16(
|
||||
input logic negr, negz,
|
||||
input logic xs, ys, zs1,
|
||||
output logic ps, zs);
|
||||
|
||||
assign ps = xs ^ ys; // sign of product
|
||||
assign zs = zs1 ^ negz; // sign of addend
|
||||
endmodule
|
||||
|
||||
module unpack16(
|
||||
input logic [15:0] x, y, z,
|
||||
output logic [10:0] xm, ym, zm,
|
||||
output logic [4:0] xe, ye, ze,
|
||||
output logic xs, ys, zs,
|
||||
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
|
||||
|
||||
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
|
||||
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
|
||||
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
|
||||
endmodule
|
||||
|
||||
module unpacknum16(
|
||||
input logic [15:0] num,
|
||||
output logic [10:0] m,
|
||||
output logic [4:0] e,
|
||||
output logic s,
|
||||
output logic zero, inf, nan);
|
||||
|
||||
logic [9:0] f; // fraction without leading 1
|
||||
logic [4:0] eb; // biased exponent
|
||||
|
||||
assign {s, eb, f} = num; // pull bit fields out of floating-point number
|
||||
assign m = {1'b1, f}; // prepend leading 1 to fraction
|
||||
assign e = eb; // leave bias in exponent ***
|
||||
assign zero = (e == 0 && f == 0);
|
||||
assign inf = (e == 31 && f == 0);
|
||||
assign nan = (e == 31 && f != 0);
|
||||
endmodule
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
module fma16(
|
||||
input logic [15:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [15:0] result);
|
||||
|
||||
endmodule
|
||||
|
@ -1,240 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "softfloat.h"
|
||||
#include "softfloat_types.h"
|
||||
|
||||
typedef union sp {
|
||||
float32_t v;
|
||||
float f;
|
||||
} sp;
|
||||
|
||||
// lists of tests, terminated with 0x8000
|
||||
uint16_t easyExponents[] = {15, 0x8000};
|
||||
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
|
||||
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
|
||||
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
|
||||
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
|
||||
uint16_t zeros[] = {0x0000, 0x8000};
|
||||
uint16_t infs[] = {0x7C00, 0xFC00};
|
||||
uint16_t nans[] = {0x7D00, 0x7D01};
|
||||
|
||||
void softfloatInit(void) {
|
||||
softfloat_roundingMode = softfloat_round_minMag;
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_detectTininess = softfloat_tininess_beforeRounding;
|
||||
}
|
||||
|
||||
float convFloat(float16_t f16) {
|
||||
float32_t f32;
|
||||
float res;
|
||||
sp r;
|
||||
|
||||
f32 = f16_to_f32(f16);
|
||||
r.v = f32;
|
||||
res = r.f;
|
||||
return res;
|
||||
}
|
||||
|
||||
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
float16_t result;
|
||||
int op, flagVals;
|
||||
char calc[80], flags[80];
|
||||
float32_t x32, y32, z32, r32;
|
||||
float xf, yf, zf, rf;
|
||||
float16_t smallest;
|
||||
|
||||
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
|
||||
if (!add) z.v = 0x0000; // force z to 0 to avoid add
|
||||
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
|
||||
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
|
||||
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
|
||||
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
|
||||
softfloat_exceptionFlags = 0; // clear exceptions
|
||||
result = f16_mulAdd(x, y, z);
|
||||
|
||||
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
|
||||
(softfloat_exceptionFlags >> 4) % 2,
|
||||
(softfloat_exceptionFlags >> 2) % 2,
|
||||
(softfloat_exceptionFlags >> 1) % 2,
|
||||
(softfloat_exceptionFlags) % 2);
|
||||
// pack these four flags into one nibble, discarding DZ flag
|
||||
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
|
||||
|
||||
|
||||
// convert to floats for printing
|
||||
xf = convFloat(x);
|
||||
yf = convFloat(y);
|
||||
zf = convFloat(z);
|
||||
rf = convFloat(result);
|
||||
if (mul)
|
||||
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
|
||||
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
|
||||
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
|
||||
|
||||
// omit denorms, which aren't required for this project
|
||||
smallest.v = 0x0400;
|
||||
float16_t resultmag = result;
|
||||
resultmag.v &= 0x7FFF; // take absolute value
|
||||
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
|
||||
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
|
||||
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
|
||||
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
|
||||
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
|
||||
}
|
||||
|
||||
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
|
||||
FILE *fptr, int *numCases) {
|
||||
int i, j;
|
||||
|
||||
fprintf(fptr, desc); fprintf(fptr, "\n");
|
||||
*numCases=0;
|
||||
for (i=0; e[i] != 0x8000; i++)
|
||||
for (j=0; f[j] != 0x8000; j++) {
|
||||
cases[*numCases].v = f[j] | e[i]<<10;
|
||||
*numCases = *numCases + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
z.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
y.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
y.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
z.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
z.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, l, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (l=0; l<=sgn; l++) {
|
||||
z.v ^= (l<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, sx, sy, sz, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
cases[numCases].v = 0x0000; // add +0 case
|
||||
cases[numCases+1].v = 0x8000; // add -0 case
|
||||
numCases += 2;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (sx=0; sx<=sgn; sx++) {
|
||||
x.v ^= (sx<<15);
|
||||
for (sy=0; sy<=sgn; sy++) {
|
||||
y.v ^= (sy<<15);
|
||||
for (sz=0; sz<=sgn; sz++) {
|
||||
z.v ^= (sz<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
softfloatInit(); // configure softfloat modes
|
||||
|
||||
// Test cases: multiplication
|
||||
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: addition
|
||||
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: FMA
|
||||
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: Zero, Infinity, NaN
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
|
||||
|
||||
// Full test cases with other rounding modes
|
||||
softfloat_roundingMode = softfloat_round_near_even;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_min;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_max;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
# check for warnings in Verilog code
|
||||
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
|
||||
export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
$verilator --lint-only --top-module fma16 fma16.v
|
@ -1,2 +0,0 @@
|
||||
vsim -do "do fma.do"
|
||||
|
@ -1 +0,0 @@
|
||||
vsim -c -do "do fma.do"
|
@ -1 +0,0 @@
|
||||
make -C ../../../synthDC synth DESIGN=fma16
|
@ -1,52 +0,0 @@
|
||||
/* verilator lint_off STMTDLY */
|
||||
module testbench_fma16;
|
||||
reg clk, reset;
|
||||
reg [15:0] x, y, z, rexpected;
|
||||
wire [15:0] result;
|
||||
reg [7:0] ctrl;
|
||||
reg [3:0] flagsexpected;
|
||||
reg mul, add, negp, negz;
|
||||
reg [1:0] roundmode;
|
||||
reg [31:0] vectornum, errors;
|
||||
reg [75:0] testvectors[10000:0];
|
||||
|
||||
// instantiate device under test
|
||||
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
|
||||
|
||||
// generate clock
|
||||
always
|
||||
begin
|
||||
clk = 1; #5; clk = 0; #5;
|
||||
end
|
||||
|
||||
// at start of test, load vectors and pulse reset
|
||||
initial
|
||||
begin
|
||||
$readmemh("work/fmul_0.tv", testvectors);
|
||||
vectornum = 0; errors = 0;
|
||||
reset = 1; #22; reset = 0;
|
||||
end
|
||||
|
||||
// apply test vectors on rising edge of clk
|
||||
always @(posedge clk)
|
||||
begin
|
||||
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
|
||||
{roundmode, mul, add, negp, negz} = ctrl[5:0];
|
||||
end
|
||||
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk)
|
||||
if (~reset) begin // skip during reset
|
||||
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
|
||||
$display("Error: inputs %h * %h + %h", x, y, z);
|
||||
$display(" result = %h (%h expected)", result, rexpected);
|
||||
errors = errors + 1;
|
||||
end
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 'x) begin
|
||||
$display("%d tests completed with %d errors",
|
||||
vectornum, errors);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
endmodule
|
File diff suppressed because it is too large
Load Diff
@ -1,130 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
# torturegen.pl
|
||||
# David_Harris@hmc.edu 19 April 2022
|
||||
# Convert TestFloat cases into format for fma16 project torture test
|
||||
# Strip out cases involving denorms
|
||||
|
||||
use strict;
|
||||
|
||||
my @basenames = ("add", "mul", "mulAdd");
|
||||
my @roundingmodes = ("rz", "rd", "ru", "rne");
|
||||
my @names = ();
|
||||
foreach my $name (@basenames) {
|
||||
foreach my $mode (@roundingmodes) {
|
||||
push(@names, "f16_${name}_$mode.tv");
|
||||
}
|
||||
}
|
||||
|
||||
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
|
||||
my $datestring = localtime();
|
||||
print(TORTURE "// Torture tests generated $datestring by $0\n");
|
||||
foreach my $tv (@names) {
|
||||
open(TV, "work/$tv") || die("Can't read $tv");
|
||||
my $type = &getType($tv); # is it mul, add, mulAdd
|
||||
my $rm = &getRm($tv); # rounding mode
|
||||
# if ($rm != 0) { next; } # only do rz
|
||||
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
my $linecount = 0;
|
||||
my $babyTorture = 0;
|
||||
while (<TV>) {
|
||||
my $line = $_;
|
||||
$linecount++;
|
||||
my $density = 10;
|
||||
if ($type eq "mulAdd") {$density = 500;}
|
||||
if ($babyTorture) {
|
||||
$density = 100;
|
||||
if ($type eq "mulAdd") {$density = 50000;}
|
||||
}
|
||||
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
|
||||
chomp($line); # strip off newline
|
||||
my @parts = split(/_/, $line);
|
||||
my ($x, $y, $z, $op, $w, $flags);
|
||||
$x = $parts[0];
|
||||
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
|
||||
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
|
||||
$op = $rm << 4;
|
||||
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
|
||||
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
|
||||
my $opname = sprintf("%02x", $op);
|
||||
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
|
||||
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
|
||||
$flags = substr($flags, -1); # take last character
|
||||
if (&fpval($w) eq "NaN") { $w = "7e00"; }
|
||||
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
|
||||
my $skip = "";
|
||||
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
|
||||
$skip = "Skipped denorm";
|
||||
}
|
||||
my $summary = &summary($x, $y, $z, $w, $type);
|
||||
if ($skip ne "") {
|
||||
print TORTURE "// $skip $tv line $linecount $line $summary\n"
|
||||
}
|
||||
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
|
||||
}
|
||||
close(TV);
|
||||
}
|
||||
close(TORTURE);
|
||||
|
||||
sub fpval {
|
||||
my $val = shift;
|
||||
$val = hex($val); # convert hex string to number
|
||||
my $frac = $val & 0x3FF;
|
||||
my $exp = ($val >> 10) & 0x1F;
|
||||
my $sign = $val >> 15;
|
||||
|
||||
my $res;
|
||||
if ($exp == 31 && $frac != 0) { return "NaN"; }
|
||||
elsif ($exp == 31) { $res = "INF"; }
|
||||
elsif ($val == 0) { $res = 0; }
|
||||
elsif ($exp == 0) { $res = "Denorm"; }
|
||||
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
|
||||
|
||||
if ($sign == 1) { $res = "-$res"; }
|
||||
return $res;
|
||||
}
|
||||
|
||||
sub summary {
|
||||
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
|
||||
|
||||
my $xv = &fpval($x);
|
||||
my $yv = &fpval($y);
|
||||
my $zv = &fpval($z);
|
||||
my $wv = &fpval($w);
|
||||
|
||||
if ($type eq "add") { return "$xv + $zv = $wv"; }
|
||||
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
|
||||
else {return "$xv * $yv + $zv = $wv"; }
|
||||
}
|
||||
|
||||
sub getType {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /mulAdd/) { return("mulAdd"); }
|
||||
elsif ($tv =~ /mul/) { return "mul"; }
|
||||
else { return "add"; }
|
||||
}
|
||||
|
||||
sub getRm {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /rz/) { return 0; }
|
||||
elsif ($tv =~ /rne/) { return 1; }
|
||||
elsif ($tv =~ /rd/) {return 2; }
|
||||
elsif ($tv =~ /ru/) { return 3; }
|
||||
else { return "bad"; }
|
||||
}
|
||||
|
||||
sub isdenorm {
|
||||
my $fp = shift;
|
||||
my $val = hex($fp);
|
||||
my $expv = $val >> 10;
|
||||
$expv = $expv & 0x1F;
|
||||
my $denorm = 0;
|
||||
if ($expv == 0 && $val != 0) { $denorm = 1;}
|
||||
# my $e0 = ($expv == 0);
|
||||
# my $vn0 = ($val != 0);
|
||||
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
|
||||
# print("Num $fp Exp $expv Denorm $denorm Done\n");
|
||||
return $denorm;
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate /testbench_fma16/clk
|
||||
add wave -noupdate /testbench_fma16/reset
|
||||
add wave -noupdate /testbench_fma16/x
|
||||
add wave -noupdate /testbench_fma16/y
|
||||
add wave -noupdate /testbench_fma16/z
|
||||
add wave -noupdate /testbench_fma16/result
|
||||
add wave -noupdate /testbench_fma16/rexpected
|
||||
add wave -noupdate /testbench_fma16/dut/x
|
||||
add wave -noupdate /testbench_fma16/dut/y
|
||||
add wave -noupdate /testbench_fma16/dut/z
|
||||
add wave -noupdate /testbench_fma16/dut/mul
|
||||
add wave -noupdate /testbench_fma16/dut/add
|
||||
add wave -noupdate /testbench_fma16/dut/negr
|
||||
add wave -noupdate /testbench_fma16/dut/negz
|
||||
add wave -noupdate /testbench_fma16/dut/roundmode
|
||||
add wave -noupdate /testbench_fma16/dut/result
|
||||
add wave -noupdate /testbench_fma16/dut/XManE
|
||||
add wave -noupdate /testbench_fma16/dut/YManE
|
||||
add wave -noupdate /testbench_fma16/dut/ZManE
|
||||
add wave -noupdate /testbench_fma16/dut/XExpE
|
||||
add wave -noupdate /testbench_fma16/dut/YExpE
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpE
|
||||
add wave -noupdate /testbench_fma16/dut/PExpE
|
||||
add wave -noupdate /testbench_fma16/dut/Ne
|
||||
add wave -noupdate /testbench_fma16/dut/upOneExt
|
||||
add wave -noupdate /testbench_fma16/dut/XSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/YSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ZSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/PSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManE
|
||||
add wave -noupdate /testbench_fma16/dut/NfracS
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManAl
|
||||
add wave -noupdate /testbench_fma16/dut/ZManExt
|
||||
add wave -noupdate /testbench_fma16/dut/ZManAl
|
||||
add wave -noupdate /testbench_fma16/dut/Nfrac
|
||||
add wave -noupdate /testbench_fma16/dut/res
|
||||
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
|
||||
add wave -noupdate /testbench_fma16/dut/NSamt
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpGreater
|
||||
add wave -noupdate /testbench_fma16/dut/ACLess
|
||||
add wave -noupdate /testbench_fma16/dut/upOne
|
||||
add wave -noupdate /testbench_fma16/dut/KillProd
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
configure wave -namecolwidth 237
|
||||
configure wave -valuecolwidth 64
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {4083 ns} {4235 ns}
|
@ -1,758 +0,0 @@
|
||||
// The following module make up the basic building blocks that
|
||||
// are used by the cla64, cla_sub64, and cla52.
|
||||
|
||||
module INVBLOCK ( GIN, GOUT );
|
||||
|
||||
input GIN;
|
||||
output GOUT;
|
||||
|
||||
assign GOUT = ~ GIN;
|
||||
|
||||
endmodule // INVBLOCK
|
||||
|
||||
|
||||
module XXOR1 ( A, B, GIN, SUM );
|
||||
|
||||
input A;
|
||||
input B;
|
||||
input GIN;
|
||||
output SUM;
|
||||
|
||||
assign SUM = ( ~ (A ^ B)) ^ GIN;
|
||||
|
||||
endmodule // XXOR1
|
||||
|
||||
|
||||
module BLOCK0 ( A, B, POUT, GOUT );
|
||||
|
||||
input A;
|
||||
input B;
|
||||
output POUT;
|
||||
output GOUT;
|
||||
|
||||
assign POUT = ~ (A | B);
|
||||
assign GOUT = ~ (A & B);
|
||||
|
||||
endmodule // BLOCK0
|
||||
|
||||
|
||||
module BLOCK1 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
|
||||
|
||||
input PIN1;
|
||||
input PIN2;
|
||||
input GIN1;
|
||||
input GIN2;
|
||||
output POUT;
|
||||
output GOUT;
|
||||
|
||||
assign POUT = ~ (PIN1 | PIN2);
|
||||
assign GOUT = ~ (GIN2 & (PIN2 | GIN1));
|
||||
|
||||
endmodule // BLOCK1
|
||||
|
||||
|
||||
module BLOCK2 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT );
|
||||
|
||||
input PIN1;
|
||||
input PIN2;
|
||||
input GIN1;
|
||||
input GIN2;
|
||||
output POUT;
|
||||
output GOUT;
|
||||
|
||||
assign POUT = ~ (PIN1 & PIN2);
|
||||
assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
|
||||
|
||||
endmodule // BLOCK2
|
||||
|
||||
|
||||
module BLOCK1A ( PIN2, GIN1, GIN2, GOUT );
|
||||
|
||||
input PIN2;
|
||||
input GIN1;
|
||||
input GIN2;
|
||||
output GOUT;
|
||||
|
||||
assign GOUT = ~ (GIN2 & (PIN2 | GIN1));
|
||||
|
||||
endmodule // BLOCK1A
|
||||
|
||||
|
||||
module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
|
||||
|
||||
input PIN2;
|
||||
input GIN1;
|
||||
input GIN2;
|
||||
output GOUT;
|
||||
|
||||
assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
|
||||
|
||||
endmodule
|
||||
//***KEP all 0:63, 0:64 ect changed - changed due to lint warning
|
||||
module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
|
||||
|
||||
input [63:0] A;
|
||||
input [63:0] B;
|
||||
input CIN;
|
||||
|
||||
output [63:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
|
||||
BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
|
||||
BLOCK0 U12 (A[2] , B[2] , POUT[2] , GOUT[3] );
|
||||
BLOCK0 U13 (A[3] , B[3] , POUT[3] , GOUT[4] );
|
||||
BLOCK0 U14 (A[4] , B[4] , POUT[4] , GOUT[5] );
|
||||
BLOCK0 U15 (A[5] , B[5] , POUT[5] , GOUT[6] );
|
||||
BLOCK0 U16 (A[6] , B[6] , POUT[6] , GOUT[7] );
|
||||
BLOCK0 U17 (A[7] , B[7] , POUT[7] , GOUT[8] );
|
||||
BLOCK0 U18 (A[8] , B[8] , POUT[8] , GOUT[9] );
|
||||
BLOCK0 U19 (A[9] , B[9] , POUT[9] , GOUT[10] );
|
||||
BLOCK0 U110 (A[10] , B[10] , POUT[10] , GOUT[11] );
|
||||
BLOCK0 U111 (A[11] , B[11] , POUT[11] , GOUT[12] );
|
||||
BLOCK0 U112 (A[12] , B[12] , POUT[12] , GOUT[13] );
|
||||
BLOCK0 U113 (A[13] , B[13] , POUT[13] , GOUT[14] );
|
||||
BLOCK0 U114 (A[14] , B[14] , POUT[14] , GOUT[15] );
|
||||
BLOCK0 U115 (A[15] , B[15] , POUT[15] , GOUT[16] );
|
||||
BLOCK0 U116 (A[16] , B[16] , POUT[16] , GOUT[17] );
|
||||
BLOCK0 U117 (A[17] , B[17] , POUT[17] , GOUT[18] );
|
||||
BLOCK0 U118 (A[18] , B[18] , POUT[18] , GOUT[19] );
|
||||
BLOCK0 U119 (A[19] , B[19] , POUT[19] , GOUT[20] );
|
||||
BLOCK0 U120 (A[20] , B[20] , POUT[20] , GOUT[21] );
|
||||
BLOCK0 U121 (A[21] , B[21] , POUT[21] , GOUT[22] );
|
||||
BLOCK0 U122 (A[22] , B[22] , POUT[22] , GOUT[23] );
|
||||
BLOCK0 U123 (A[23] , B[23] , POUT[23] , GOUT[24] );
|
||||
BLOCK0 U124 (A[24] , B[24] , POUT[24] , GOUT[25] );
|
||||
BLOCK0 U125 (A[25] , B[25] , POUT[25] , GOUT[26] );
|
||||
BLOCK0 U126 (A[26] , B[26] , POUT[26] , GOUT[27] );
|
||||
BLOCK0 U127 (A[27] , B[27] , POUT[27] , GOUT[28] );
|
||||
BLOCK0 U128 (A[28] , B[28] , POUT[28] , GOUT[29] );
|
||||
BLOCK0 U129 (A[29] , B[29] , POUT[29] , GOUT[30] );
|
||||
BLOCK0 U130 (A[30] , B[30] , POUT[30] , GOUT[31] );
|
||||
BLOCK0 U131 (A[31] , B[31] , POUT[31] , GOUT[32] );
|
||||
BLOCK0 U132 (A[32] , B[32] , POUT[32] , GOUT[33] );
|
||||
BLOCK0 U133 (A[33] , B[33] , POUT[33] , GOUT[34] );
|
||||
BLOCK0 U134 (A[34] , B[34] , POUT[34] , GOUT[35] );
|
||||
BLOCK0 U135 (A[35] , B[35] , POUT[35] , GOUT[36] );
|
||||
BLOCK0 U136 (A[36] , B[36] , POUT[36] , GOUT[37] );
|
||||
BLOCK0 U137 (A[37] , B[37] , POUT[37] , GOUT[38] );
|
||||
BLOCK0 U138 (A[38] , B[38] , POUT[38] , GOUT[39] );
|
||||
BLOCK0 U139 (A[39] , B[39] , POUT[39] , GOUT[40] );
|
||||
BLOCK0 U140 (A[40] , B[40] , POUT[40] , GOUT[41] );
|
||||
BLOCK0 U141 (A[41] , B[41] , POUT[41] , GOUT[42] );
|
||||
BLOCK0 U142 (A[42] , B[42] , POUT[42] , GOUT[43] );
|
||||
BLOCK0 U143 (A[43] , B[43] , POUT[43] , GOUT[44] );
|
||||
BLOCK0 U144 (A[44] , B[44] , POUT[44] , GOUT[45] );
|
||||
BLOCK0 U145 (A[45] , B[45] , POUT[45] , GOUT[46] );
|
||||
BLOCK0 U146 (A[46] , B[46] , POUT[46] , GOUT[47] );
|
||||
BLOCK0 U147 (A[47] , B[47] , POUT[47] , GOUT[48] );
|
||||
BLOCK0 U148 (A[48] , B[48] , POUT[48] , GOUT[49] );
|
||||
BLOCK0 U149 (A[49] , B[49] , POUT[49] , GOUT[50] );
|
||||
BLOCK0 U150 (A[50] , B[50] , POUT[50] , GOUT[51] );
|
||||
BLOCK0 U151 (A[51] , B[51] , POUT[51] , GOUT[52] );
|
||||
BLOCK0 U152 (A[52] , B[52] , POUT[52] , GOUT[53] );
|
||||
BLOCK0 U153 (A[53] , B[53] , POUT[53] , GOUT[54] );
|
||||
BLOCK0 U154 (A[54] , B[54] , POUT[54] , GOUT[55] );
|
||||
BLOCK0 U155 (A[55] , B[55] , POUT[55] , GOUT[56] );
|
||||
BLOCK0 U156 (A[56] , B[56] , POUT[56] , GOUT[57] );
|
||||
BLOCK0 U157 (A[57] , B[57] , POUT[57] , GOUT[58] );
|
||||
BLOCK0 U158 (A[58] , B[58] , POUT[58] , GOUT[59] );
|
||||
BLOCK0 U159 (A[59] , B[59] , POUT[59] , GOUT[60] );
|
||||
BLOCK0 U160 (A[60] , B[60] , POUT[60] , GOUT[61] );
|
||||
BLOCK0 U161 (A[61] , B[61] , POUT[61] , GOUT[62] );
|
||||
BLOCK0 U162 (A[62] , B[62] , POUT[62] , GOUT[63] );
|
||||
BLOCK0 U163 (A[63] , B[63] , POUT[63] , GOUT[64] );
|
||||
INVBLOCK U2 (CIN , GOUT[0] );
|
||||
|
||||
endmodule // PRESTAGE_64
|
||||
|
||||
|
||||
module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [63:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [62:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
|
||||
BLOCK1 U32 (PIN[0] , PIN[1] , GIN[1] , GIN[2] , POUT[0] , GOUT[2] );
|
||||
BLOCK1 U33 (PIN[1] , PIN[2] , GIN[2] , GIN[3] , POUT[1] , GOUT[3] );
|
||||
BLOCK1 U34 (PIN[2] , PIN[3] , GIN[3] , GIN[4] , POUT[2] , GOUT[4] );
|
||||
BLOCK1 U35 (PIN[3] , PIN[4] , GIN[4] , GIN[5] , POUT[3] , GOUT[5] );
|
||||
BLOCK1 U36 (PIN[4] , PIN[5] , GIN[5] , GIN[6] , POUT[4] , GOUT[6] );
|
||||
BLOCK1 U37 (PIN[5] , PIN[6] , GIN[6] , GIN[7] , POUT[5] , GOUT[7] );
|
||||
BLOCK1 U38 (PIN[6] , PIN[7] , GIN[7] , GIN[8] , POUT[6] , GOUT[8] );
|
||||
BLOCK1 U39 (PIN[7] , PIN[8] , GIN[8] , GIN[9] , POUT[7] , GOUT[9] );
|
||||
BLOCK1 U310 (PIN[8] , PIN[9] , GIN[9] , GIN[10] , POUT[8] , GOUT[10] );
|
||||
BLOCK1 U311 (PIN[9] , PIN[10] , GIN[10] , GIN[11] , POUT[9] , GOUT[11] );
|
||||
BLOCK1 U312 (PIN[10] , PIN[11] , GIN[11] , GIN[12] , POUT[10] , GOUT[12] );
|
||||
BLOCK1 U313 (PIN[11] , PIN[12] , GIN[12] , GIN[13] , POUT[11] , GOUT[13] );
|
||||
BLOCK1 U314 (PIN[12] , PIN[13] , GIN[13] , GIN[14] , POUT[12] , GOUT[14] );
|
||||
BLOCK1 U315 (PIN[13] , PIN[14] , GIN[14] , GIN[15] , POUT[13] , GOUT[15] );
|
||||
BLOCK1 U316 (PIN[14] , PIN[15] , GIN[15] , GIN[16] , POUT[14] , GOUT[16] );
|
||||
BLOCK1 U317 (PIN[15] , PIN[16] , GIN[16] , GIN[17] , POUT[15] , GOUT[17] );
|
||||
BLOCK1 U318 (PIN[16] , PIN[17] , GIN[17] , GIN[18] , POUT[16] , GOUT[18] );
|
||||
BLOCK1 U319 (PIN[17] , PIN[18] , GIN[18] , GIN[19] , POUT[17] , GOUT[19] );
|
||||
BLOCK1 U320 (PIN[18] , PIN[19] , GIN[19] , GIN[20] , POUT[18] , GOUT[20] );
|
||||
BLOCK1 U321 (PIN[19] , PIN[20] , GIN[20] , GIN[21] , POUT[19] , GOUT[21] );
|
||||
BLOCK1 U322 (PIN[20] , PIN[21] , GIN[21] , GIN[22] , POUT[20] , GOUT[22] );
|
||||
BLOCK1 U323 (PIN[21] , PIN[22] , GIN[22] , GIN[23] , POUT[21] , GOUT[23] );
|
||||
BLOCK1 U324 (PIN[22] , PIN[23] , GIN[23] , GIN[24] , POUT[22] , GOUT[24] );
|
||||
BLOCK1 U325 (PIN[23] , PIN[24] , GIN[24] , GIN[25] , POUT[23] , GOUT[25] );
|
||||
BLOCK1 U326 (PIN[24] , PIN[25] , GIN[25] , GIN[26] , POUT[24] , GOUT[26] );
|
||||
BLOCK1 U327 (PIN[25] , PIN[26] , GIN[26] , GIN[27] , POUT[25] , GOUT[27] );
|
||||
BLOCK1 U328 (PIN[26] , PIN[27] , GIN[27] , GIN[28] , POUT[26] , GOUT[28] );
|
||||
BLOCK1 U329 (PIN[27] , PIN[28] , GIN[28] , GIN[29] , POUT[27] , GOUT[29] );
|
||||
BLOCK1 U330 (PIN[28] , PIN[29] , GIN[29] , GIN[30] , POUT[28] , GOUT[30] );
|
||||
BLOCK1 U331 (PIN[29] , PIN[30] , GIN[30] , GIN[31] , POUT[29] , GOUT[31] );
|
||||
BLOCK1 U332 (PIN[30] , PIN[31] , GIN[31] , GIN[32] , POUT[30] , GOUT[32] );
|
||||
BLOCK1 U333 (PIN[31] , PIN[32] , GIN[32] , GIN[33] , POUT[31] , GOUT[33] );
|
||||
BLOCK1 U334 (PIN[32] , PIN[33] , GIN[33] , GIN[34] , POUT[32] , GOUT[34] );
|
||||
BLOCK1 U335 (PIN[33] , PIN[34] , GIN[34] , GIN[35] , POUT[33] , GOUT[35] );
|
||||
BLOCK1 U336 (PIN[34] , PIN[35] , GIN[35] , GIN[36] , POUT[34] , GOUT[36] );
|
||||
BLOCK1 U337 (PIN[35] , PIN[36] , GIN[36] , GIN[37] , POUT[35] , GOUT[37] );
|
||||
BLOCK1 U338 (PIN[36] , PIN[37] , GIN[37] , GIN[38] , POUT[36] , GOUT[38] );
|
||||
BLOCK1 U339 (PIN[37] , PIN[38] , GIN[38] , GIN[39] , POUT[37] , GOUT[39] );
|
||||
BLOCK1 U340 (PIN[38] , PIN[39] , GIN[39] , GIN[40] , POUT[38] , GOUT[40] );
|
||||
BLOCK1 U341 (PIN[39] , PIN[40] , GIN[40] , GIN[41] , POUT[39] , GOUT[41] );
|
||||
BLOCK1 U342 (PIN[40] , PIN[41] , GIN[41] , GIN[42] , POUT[40] , GOUT[42] );
|
||||
BLOCK1 U343 (PIN[41] , PIN[42] , GIN[42] , GIN[43] , POUT[41] , GOUT[43] );
|
||||
BLOCK1 U344 (PIN[42] , PIN[43] , GIN[43] , GIN[44] , POUT[42] , GOUT[44] );
|
||||
BLOCK1 U345 (PIN[43] , PIN[44] , GIN[44] , GIN[45] , POUT[43] , GOUT[45] );
|
||||
BLOCK1 U346 (PIN[44] , PIN[45] , GIN[45] , GIN[46] , POUT[44] , GOUT[46] );
|
||||
BLOCK1 U347 (PIN[45] , PIN[46] , GIN[46] , GIN[47] , POUT[45] , GOUT[47] );
|
||||
BLOCK1 U348 (PIN[46] , PIN[47] , GIN[47] , GIN[48] , POUT[46] , GOUT[48] );
|
||||
BLOCK1 U349 (PIN[47] , PIN[48] , GIN[48] , GIN[49] , POUT[47] , GOUT[49] );
|
||||
BLOCK1 U350 (PIN[48] , PIN[49] , GIN[49] , GIN[50] , POUT[48] , GOUT[50] );
|
||||
BLOCK1 U351 (PIN[49] , PIN[50] , GIN[50] , GIN[51] , POUT[49] , GOUT[51] );
|
||||
BLOCK1 U352 (PIN[50] , PIN[51] , GIN[51] , GIN[52] , POUT[50] , GOUT[52] );
|
||||
BLOCK1 U353 (PIN[51] , PIN[52] , GIN[52] , GIN[53] , POUT[51] , GOUT[53] );
|
||||
BLOCK1 U354 (PIN[52] , PIN[53] , GIN[53] , GIN[54] , POUT[52] , GOUT[54] );
|
||||
BLOCK1 U355 (PIN[53] , PIN[54] , GIN[54] , GIN[55] , POUT[53] , GOUT[55] );
|
||||
BLOCK1 U356 (PIN[54] , PIN[55] , GIN[55] , GIN[56] , POUT[54] , GOUT[56] );
|
||||
BLOCK1 U357 (PIN[55] , PIN[56] , GIN[56] , GIN[57] , POUT[55] , GOUT[57] );
|
||||
BLOCK1 U358 (PIN[56] , PIN[57] , GIN[57] , GIN[58] , POUT[56] , GOUT[58] );
|
||||
BLOCK1 U359 (PIN[57] , PIN[58] , GIN[58] , GIN[59] , POUT[57] , GOUT[59] );
|
||||
BLOCK1 U360 (PIN[58] , PIN[59] , GIN[59] , GIN[60] , POUT[58] , GOUT[60] );
|
||||
BLOCK1 U361 (PIN[59] , PIN[60] , GIN[60] , GIN[61] , POUT[59] , GOUT[61] );
|
||||
BLOCK1 U362 (PIN[60] , PIN[61] , GIN[61] , GIN[62] , POUT[60] , GOUT[62] );
|
||||
BLOCK1 U363 (PIN[61] , PIN[62] , GIN[62] , GIN[63] , POUT[61] , GOUT[63] );
|
||||
BLOCK1 U364 (PIN[62] , PIN[63] , GIN[63] , GIN[64] , POUT[62] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_0_64
|
||||
|
||||
|
||||
module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [62:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [60:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
BLOCK2A U22 (PIN[0] , GIN[0] , GIN[2] , GOUT[2] );
|
||||
BLOCK2A U23 (PIN[1] , GIN[1] , GIN[3] , GOUT[3] );
|
||||
BLOCK2 U34 (PIN[0] , PIN[2] , GIN[2] , GIN[4] , POUT[0] , GOUT[4] );
|
||||
BLOCK2 U35 (PIN[1] , PIN[3] , GIN[3] , GIN[5] , POUT[1] , GOUT[5] );
|
||||
BLOCK2 U36 (PIN[2] , PIN[4] , GIN[4] , GIN[6] , POUT[2] , GOUT[6] );
|
||||
BLOCK2 U37 (PIN[3] , PIN[5] , GIN[5] , GIN[7] , POUT[3] , GOUT[7] );
|
||||
BLOCK2 U38 (PIN[4] , PIN[6] , GIN[6] , GIN[8] , POUT[4] , GOUT[8] );
|
||||
BLOCK2 U39 (PIN[5] , PIN[7] , GIN[7] , GIN[9] , POUT[5] , GOUT[9] );
|
||||
BLOCK2 U310 (PIN[6] , PIN[8] , GIN[8] , GIN[10] , POUT[6] , GOUT[10] );
|
||||
BLOCK2 U311 (PIN[7] , PIN[9] , GIN[9] , GIN[11] , POUT[7] , GOUT[11] );
|
||||
BLOCK2 U312 (PIN[8] , PIN[10] , GIN[10] , GIN[12] , POUT[8] , GOUT[12] );
|
||||
BLOCK2 U313 (PIN[9] , PIN[11] , GIN[11] , GIN[13] , POUT[9] , GOUT[13] );
|
||||
BLOCK2 U314 (PIN[10] , PIN[12] , GIN[12] , GIN[14] , POUT[10] , GOUT[14] );
|
||||
BLOCK2 U315 (PIN[11] , PIN[13] , GIN[13] , GIN[15] , POUT[11] , GOUT[15] );
|
||||
BLOCK2 U316 (PIN[12] , PIN[14] , GIN[14] , GIN[16] , POUT[12] , GOUT[16] );
|
||||
BLOCK2 U317 (PIN[13] , PIN[15] , GIN[15] , GIN[17] , POUT[13] , GOUT[17] );
|
||||
BLOCK2 U318 (PIN[14] , PIN[16] , GIN[16] , GIN[18] , POUT[14] , GOUT[18] );
|
||||
BLOCK2 U319 (PIN[15] , PIN[17] , GIN[17] , GIN[19] , POUT[15] , GOUT[19] );
|
||||
BLOCK2 U320 (PIN[16] , PIN[18] , GIN[18] , GIN[20] , POUT[16] , GOUT[20] );
|
||||
BLOCK2 U321 (PIN[17] , PIN[19] , GIN[19] , GIN[21] , POUT[17] , GOUT[21] );
|
||||
BLOCK2 U322 (PIN[18] , PIN[20] , GIN[20] , GIN[22] , POUT[18] , GOUT[22] );
|
||||
BLOCK2 U323 (PIN[19] , PIN[21] , GIN[21] , GIN[23] , POUT[19] , GOUT[23] );
|
||||
BLOCK2 U324 (PIN[20] , PIN[22] , GIN[22] , GIN[24] , POUT[20] , GOUT[24] );
|
||||
BLOCK2 U325 (PIN[21] , PIN[23] , GIN[23] , GIN[25] , POUT[21] , GOUT[25] );
|
||||
BLOCK2 U326 (PIN[22] , PIN[24] , GIN[24] , GIN[26] , POUT[22] , GOUT[26] );
|
||||
BLOCK2 U327 (PIN[23] , PIN[25] , GIN[25] , GIN[27] , POUT[23] , GOUT[27] );
|
||||
BLOCK2 U328 (PIN[24] , PIN[26] , GIN[26] , GIN[28] , POUT[24] , GOUT[28] );
|
||||
BLOCK2 U329 (PIN[25] , PIN[27] , GIN[27] , GIN[29] , POUT[25] , GOUT[29] );
|
||||
BLOCK2 U330 (PIN[26] , PIN[28] , GIN[28] , GIN[30] , POUT[26] , GOUT[30] );
|
||||
BLOCK2 U331 (PIN[27] , PIN[29] , GIN[29] , GIN[31] , POUT[27] , GOUT[31] );
|
||||
BLOCK2 U332 (PIN[28] , PIN[30] , GIN[30] , GIN[32] , POUT[28] , GOUT[32] );
|
||||
BLOCK2 U333 (PIN[29] , PIN[31] , GIN[31] , GIN[33] , POUT[29] , GOUT[33] );
|
||||
BLOCK2 U334 (PIN[30] , PIN[32] , GIN[32] , GIN[34] , POUT[30] , GOUT[34] );
|
||||
BLOCK2 U335 (PIN[31] , PIN[33] , GIN[33] , GIN[35] , POUT[31] , GOUT[35] );
|
||||
BLOCK2 U336 (PIN[32] , PIN[34] , GIN[34] , GIN[36] , POUT[32] , GOUT[36] );
|
||||
BLOCK2 U337 (PIN[33] , PIN[35] , GIN[35] , GIN[37] , POUT[33] , GOUT[37] );
|
||||
BLOCK2 U338 (PIN[34] , PIN[36] , GIN[36] , GIN[38] , POUT[34] , GOUT[38] );
|
||||
BLOCK2 U339 (PIN[35] , PIN[37] , GIN[37] , GIN[39] , POUT[35] , GOUT[39] );
|
||||
BLOCK2 U340 (PIN[36] , PIN[38] , GIN[38] , GIN[40] , POUT[36] , GOUT[40] );
|
||||
BLOCK2 U341 (PIN[37] , PIN[39] , GIN[39] , GIN[41] , POUT[37] , GOUT[41] );
|
||||
BLOCK2 U342 (PIN[38] , PIN[40] , GIN[40] , GIN[42] , POUT[38] , GOUT[42] );
|
||||
BLOCK2 U343 (PIN[39] , PIN[41] , GIN[41] , GIN[43] , POUT[39] , GOUT[43] );
|
||||
BLOCK2 U344 (PIN[40] , PIN[42] , GIN[42] , GIN[44] , POUT[40] , GOUT[44] );
|
||||
BLOCK2 U345 (PIN[41] , PIN[43] , GIN[43] , GIN[45] , POUT[41] , GOUT[45] );
|
||||
BLOCK2 U346 (PIN[42] , PIN[44] , GIN[44] , GIN[46] , POUT[42] , GOUT[46] );
|
||||
BLOCK2 U347 (PIN[43] , PIN[45] , GIN[45] , GIN[47] , POUT[43] , GOUT[47] );
|
||||
BLOCK2 U348 (PIN[44] , PIN[46] , GIN[46] , GIN[48] , POUT[44] , GOUT[48] );
|
||||
BLOCK2 U349 (PIN[45] , PIN[47] , GIN[47] , GIN[49] , POUT[45] , GOUT[49] );
|
||||
BLOCK2 U350 (PIN[46] , PIN[48] , GIN[48] , GIN[50] , POUT[46] , GOUT[50] );
|
||||
BLOCK2 U351 (PIN[47] , PIN[49] , GIN[49] , GIN[51] , POUT[47] , GOUT[51] );
|
||||
BLOCK2 U352 (PIN[48] , PIN[50] , GIN[50] , GIN[52] , POUT[48] , GOUT[52] );
|
||||
BLOCK2 U353 (PIN[49] , PIN[51] , GIN[51] , GIN[53] , POUT[49] , GOUT[53] );
|
||||
BLOCK2 U354 (PIN[50] , PIN[52] , GIN[52] , GIN[54] , POUT[50] , GOUT[54] );
|
||||
BLOCK2 U355 (PIN[51] , PIN[53] , GIN[53] , GIN[55] , POUT[51] , GOUT[55] );
|
||||
BLOCK2 U356 (PIN[52] , PIN[54] , GIN[54] , GIN[56] , POUT[52] , GOUT[56] );
|
||||
BLOCK2 U357 (PIN[53] , PIN[55] , GIN[55] , GIN[57] , POUT[53] , GOUT[57] );
|
||||
BLOCK2 U358 (PIN[54] , PIN[56] , GIN[56] , GIN[58] , POUT[54] , GOUT[58] );
|
||||
BLOCK2 U359 (PIN[55] , PIN[57] , GIN[57] , GIN[59] , POUT[55] , GOUT[59] );
|
||||
BLOCK2 U360 (PIN[56] , PIN[58] , GIN[58] , GIN[60] , POUT[56] , GOUT[60] );
|
||||
BLOCK2 U361 (PIN[57] , PIN[59] , GIN[59] , GIN[61] , POUT[57] , GOUT[61] );
|
||||
BLOCK2 U362 (PIN[58] , PIN[60] , GIN[60] , GIN[62] , POUT[58] , GOUT[62] );
|
||||
BLOCK2 U363 (PIN[59] , PIN[61] , GIN[61] , GIN[63] , POUT[59] , GOUT[63] );
|
||||
BLOCK2 U364 (PIN[60] , PIN[62] , GIN[62] , GIN[64] , POUT[60] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_1_64
|
||||
|
||||
|
||||
module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [60:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [56:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
INVBLOCK U12 (GIN[2] , GOUT[2] );
|
||||
INVBLOCK U13 (GIN[3] , GOUT[3] );
|
||||
BLOCK1A U24 (PIN[0] , GIN[0] , GIN[4] , GOUT[4] );
|
||||
BLOCK1A U25 (PIN[1] , GIN[1] , GIN[5] , GOUT[5] );
|
||||
BLOCK1A U26 (PIN[2] , GIN[2] , GIN[6] , GOUT[6] );
|
||||
BLOCK1A U27 (PIN[3] , GIN[3] , GIN[7] , GOUT[7] );
|
||||
BLOCK1 U38 (PIN[0] , PIN[4] , GIN[4] , GIN[8] , POUT[0] , GOUT[8] );
|
||||
BLOCK1 U39 (PIN[1] , PIN[5] , GIN[5] , GIN[9] , POUT[1] , GOUT[9] );
|
||||
BLOCK1 U310 (PIN[2] , PIN[6] , GIN[6] , GIN[10] , POUT[2] , GOUT[10] );
|
||||
BLOCK1 U311 (PIN[3] , PIN[7] , GIN[7] , GIN[11] , POUT[3] , GOUT[11] );
|
||||
BLOCK1 U312 (PIN[4] , PIN[8] , GIN[8] , GIN[12] , POUT[4] , GOUT[12] );
|
||||
BLOCK1 U313 (PIN[5] , PIN[9] , GIN[9] , GIN[13] , POUT[5] , GOUT[13] );
|
||||
BLOCK1 U314 (PIN[6] , PIN[10] , GIN[10] , GIN[14] , POUT[6] , GOUT[14] );
|
||||
BLOCK1 U315 (PIN[7] , PIN[11] , GIN[11] , GIN[15] , POUT[7] , GOUT[15] );
|
||||
BLOCK1 U316 (PIN[8] , PIN[12] , GIN[12] , GIN[16] , POUT[8] , GOUT[16] );
|
||||
BLOCK1 U317 (PIN[9] , PIN[13] , GIN[13] , GIN[17] , POUT[9] , GOUT[17] );
|
||||
BLOCK1 U318 (PIN[10] , PIN[14] , GIN[14] , GIN[18] , POUT[10] , GOUT[18] );
|
||||
BLOCK1 U319 (PIN[11] , PIN[15] , GIN[15] , GIN[19] , POUT[11] , GOUT[19] );
|
||||
BLOCK1 U320 (PIN[12] , PIN[16] , GIN[16] , GIN[20] , POUT[12] , GOUT[20] );
|
||||
BLOCK1 U321 (PIN[13] , PIN[17] , GIN[17] , GIN[21] , POUT[13] , GOUT[21] );
|
||||
BLOCK1 U322 (PIN[14] , PIN[18] , GIN[18] , GIN[22] , POUT[14] , GOUT[22] );
|
||||
BLOCK1 U323 (PIN[15] , PIN[19] , GIN[19] , GIN[23] , POUT[15] , GOUT[23] );
|
||||
BLOCK1 U324 (PIN[16] , PIN[20] , GIN[20] , GIN[24] , POUT[16] , GOUT[24] );
|
||||
BLOCK1 U325 (PIN[17] , PIN[21] , GIN[21] , GIN[25] , POUT[17] , GOUT[25] );
|
||||
BLOCK1 U326 (PIN[18] , PIN[22] , GIN[22] , GIN[26] , POUT[18] , GOUT[26] );
|
||||
BLOCK1 U327 (PIN[19] , PIN[23] , GIN[23] , GIN[27] , POUT[19] , GOUT[27] );
|
||||
BLOCK1 U328 (PIN[20] , PIN[24] , GIN[24] , GIN[28] , POUT[20] , GOUT[28] );
|
||||
BLOCK1 U329 (PIN[21] , PIN[25] , GIN[25] , GIN[29] , POUT[21] , GOUT[29] );
|
||||
BLOCK1 U330 (PIN[22] , PIN[26] , GIN[26] , GIN[30] , POUT[22] , GOUT[30] );
|
||||
BLOCK1 U331 (PIN[23] , PIN[27] , GIN[27] , GIN[31] , POUT[23] , GOUT[31] );
|
||||
BLOCK1 U332 (PIN[24] , PIN[28] , GIN[28] , GIN[32] , POUT[24] , GOUT[32] );
|
||||
BLOCK1 U333 (PIN[25] , PIN[29] , GIN[29] , GIN[33] , POUT[25] , GOUT[33] );
|
||||
BLOCK1 U334 (PIN[26] , PIN[30] , GIN[30] , GIN[34] , POUT[26] , GOUT[34] );
|
||||
BLOCK1 U335 (PIN[27] , PIN[31] , GIN[31] , GIN[35] , POUT[27] , GOUT[35] );
|
||||
BLOCK1 U336 (PIN[28] , PIN[32] , GIN[32] , GIN[36] , POUT[28] , GOUT[36] );
|
||||
BLOCK1 U337 (PIN[29] , PIN[33] , GIN[33] , GIN[37] , POUT[29] , GOUT[37] );
|
||||
BLOCK1 U338 (PIN[30] , PIN[34] , GIN[34] , GIN[38] , POUT[30] , GOUT[38] );
|
||||
BLOCK1 U339 (PIN[31] , PIN[35] , GIN[35] , GIN[39] , POUT[31] , GOUT[39] );
|
||||
BLOCK1 U340 (PIN[32] , PIN[36] , GIN[36] , GIN[40] , POUT[32] , GOUT[40] );
|
||||
BLOCK1 U341 (PIN[33] , PIN[37] , GIN[37] , GIN[41] , POUT[33] , GOUT[41] );
|
||||
BLOCK1 U342 (PIN[34] , PIN[38] , GIN[38] , GIN[42] , POUT[34] , GOUT[42] );
|
||||
BLOCK1 U343 (PIN[35] , PIN[39] , GIN[39] , GIN[43] , POUT[35] , GOUT[43] );
|
||||
BLOCK1 U344 (PIN[36] , PIN[40] , GIN[40] , GIN[44] , POUT[36] , GOUT[44] );
|
||||
BLOCK1 U345 (PIN[37] , PIN[41] , GIN[41] , GIN[45] , POUT[37] , GOUT[45] );
|
||||
BLOCK1 U346 (PIN[38] , PIN[42] , GIN[42] , GIN[46] , POUT[38] , GOUT[46] );
|
||||
BLOCK1 U347 (PIN[39] , PIN[43] , GIN[43] , GIN[47] , POUT[39] , GOUT[47] );
|
||||
BLOCK1 U348 (PIN[40] , PIN[44] , GIN[44] , GIN[48] , POUT[40] , GOUT[48] );
|
||||
BLOCK1 U349 (PIN[41] , PIN[45] , GIN[45] , GIN[49] , POUT[41] , GOUT[49] );
|
||||
BLOCK1 U350 (PIN[42] , PIN[46] , GIN[46] , GIN[50] , POUT[42] , GOUT[50] );
|
||||
BLOCK1 U351 (PIN[43] , PIN[47] , GIN[47] , GIN[51] , POUT[43] , GOUT[51] );
|
||||
BLOCK1 U352 (PIN[44] , PIN[48] , GIN[48] , GIN[52] , POUT[44] , GOUT[52] );
|
||||
BLOCK1 U353 (PIN[45] , PIN[49] , GIN[49] , GIN[53] , POUT[45] , GOUT[53] );
|
||||
BLOCK1 U354 (PIN[46] , PIN[50] , GIN[50] , GIN[54] , POUT[46] , GOUT[54] );
|
||||
BLOCK1 U355 (PIN[47] , PIN[51] , GIN[51] , GIN[55] , POUT[47] , GOUT[55] );
|
||||
BLOCK1 U356 (PIN[48] , PIN[52] , GIN[52] , GIN[56] , POUT[48] , GOUT[56] );
|
||||
BLOCK1 U357 (PIN[49] , PIN[53] , GIN[53] , GIN[57] , POUT[49] , GOUT[57] );
|
||||
BLOCK1 U358 (PIN[50] , PIN[54] , GIN[54] , GIN[58] , POUT[50] , GOUT[58] );
|
||||
BLOCK1 U359 (PIN[51] , PIN[55] , GIN[55] , GIN[59] , POUT[51] , GOUT[59] );
|
||||
BLOCK1 U360 (PIN[52] , PIN[56] , GIN[56] , GIN[60] , POUT[52] , GOUT[60] );
|
||||
BLOCK1 U361 (PIN[53] , PIN[57] , GIN[57] , GIN[61] , POUT[53] , GOUT[61] );
|
||||
BLOCK1 U362 (PIN[54] , PIN[58] , GIN[58] , GIN[62] , POUT[54] , GOUT[62] );
|
||||
BLOCK1 U363 (PIN[55] , PIN[59] , GIN[59] , GIN[63] , POUT[55] , GOUT[63] );
|
||||
BLOCK1 U364 (PIN[56] , PIN[60] , GIN[60] , GIN[64] , POUT[56] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_2_64
|
||||
|
||||
|
||||
module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [56:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [48:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
INVBLOCK U12 (GIN[2] , GOUT[2] );
|
||||
INVBLOCK U13 (GIN[3] , GOUT[3] );
|
||||
INVBLOCK U14 (GIN[4] , GOUT[4] );
|
||||
INVBLOCK U15 (GIN[5] , GOUT[5] );
|
||||
INVBLOCK U16 (GIN[6] , GOUT[6] );
|
||||
INVBLOCK U17 (GIN[7] , GOUT[7] );
|
||||
BLOCK2A U28 (PIN[0] , GIN[0] , GIN[8] , GOUT[8] );
|
||||
BLOCK2A U29 (PIN[1] , GIN[1] , GIN[9] , GOUT[9] );
|
||||
BLOCK2A U210 (PIN[2] , GIN[2] , GIN[10] , GOUT[10] );
|
||||
BLOCK2A U211 (PIN[3] , GIN[3] , GIN[11] , GOUT[11] );
|
||||
BLOCK2A U212 (PIN[4] , GIN[4] , GIN[12] , GOUT[12] );
|
||||
BLOCK2A U213 (PIN[5] , GIN[5] , GIN[13] , GOUT[13] );
|
||||
BLOCK2A U214 (PIN[6] , GIN[6] , GIN[14] , GOUT[14] );
|
||||
BLOCK2A U215 (PIN[7] , GIN[7] , GIN[15] , GOUT[15] );
|
||||
BLOCK2 U316 (PIN[0] , PIN[8] , GIN[8] , GIN[16] , POUT[0] , GOUT[16] );
|
||||
BLOCK2 U317 (PIN[1] , PIN[9] , GIN[9] , GIN[17] , POUT[1] , GOUT[17] );
|
||||
BLOCK2 U318 (PIN[2] , PIN[10] , GIN[10] , GIN[18] , POUT[2] , GOUT[18] );
|
||||
BLOCK2 U319 (PIN[3] , PIN[11] , GIN[11] , GIN[19] , POUT[3] , GOUT[19] );
|
||||
BLOCK2 U320 (PIN[4] , PIN[12] , GIN[12] , GIN[20] , POUT[4] , GOUT[20] );
|
||||
BLOCK2 U321 (PIN[5] , PIN[13] , GIN[13] , GIN[21] , POUT[5] , GOUT[21] );
|
||||
BLOCK2 U322 (PIN[6] , PIN[14] , GIN[14] , GIN[22] , POUT[6] , GOUT[22] );
|
||||
BLOCK2 U323 (PIN[7] , PIN[15] , GIN[15] , GIN[23] , POUT[7] , GOUT[23] );
|
||||
BLOCK2 U324 (PIN[8] , PIN[16] , GIN[16] , GIN[24] , POUT[8] , GOUT[24] );
|
||||
BLOCK2 U325 (PIN[9] , PIN[17] , GIN[17] , GIN[25] , POUT[9] , GOUT[25] );
|
||||
BLOCK2 U326 (PIN[10] , PIN[18] , GIN[18] , GIN[26] , POUT[10] , GOUT[26] );
|
||||
BLOCK2 U327 (PIN[11] , PIN[19] , GIN[19] , GIN[27] , POUT[11] , GOUT[27] );
|
||||
BLOCK2 U328 (PIN[12] , PIN[20] , GIN[20] , GIN[28] , POUT[12] , GOUT[28] );
|
||||
BLOCK2 U329 (PIN[13] , PIN[21] , GIN[21] , GIN[29] , POUT[13] , GOUT[29] );
|
||||
BLOCK2 U330 (PIN[14] , PIN[22] , GIN[22] , GIN[30] , POUT[14] , GOUT[30] );
|
||||
BLOCK2 U331 (PIN[15] , PIN[23] , GIN[23] , GIN[31] , POUT[15] , GOUT[31] );
|
||||
BLOCK2 U332 (PIN[16] , PIN[24] , GIN[24] , GIN[32] , POUT[16] , GOUT[32] );
|
||||
BLOCK2 U333 (PIN[17] , PIN[25] , GIN[25] , GIN[33] , POUT[17] , GOUT[33] );
|
||||
BLOCK2 U334 (PIN[18] , PIN[26] , GIN[26] , GIN[34] , POUT[18] , GOUT[34] );
|
||||
BLOCK2 U335 (PIN[19] , PIN[27] , GIN[27] , GIN[35] , POUT[19] , GOUT[35] );
|
||||
BLOCK2 U336 (PIN[20] , PIN[28] , GIN[28] , GIN[36] , POUT[20] , GOUT[36] );
|
||||
BLOCK2 U337 (PIN[21] , PIN[29] , GIN[29] , GIN[37] , POUT[21] , GOUT[37] );
|
||||
BLOCK2 U338 (PIN[22] , PIN[30] , GIN[30] , GIN[38] , POUT[22] , GOUT[38] );
|
||||
BLOCK2 U339 (PIN[23] , PIN[31] , GIN[31] , GIN[39] , POUT[23] , GOUT[39] );
|
||||
BLOCK2 U340 (PIN[24] , PIN[32] , GIN[32] , GIN[40] , POUT[24] , GOUT[40] );
|
||||
BLOCK2 U341 (PIN[25] , PIN[33] , GIN[33] , GIN[41] , POUT[25] , GOUT[41] );
|
||||
BLOCK2 U342 (PIN[26] , PIN[34] , GIN[34] , GIN[42] , POUT[26] , GOUT[42] );
|
||||
BLOCK2 U343 (PIN[27] , PIN[35] , GIN[35] , GIN[43] , POUT[27] , GOUT[43] );
|
||||
BLOCK2 U344 (PIN[28] , PIN[36] , GIN[36] , GIN[44] , POUT[28] , GOUT[44] );
|
||||
BLOCK2 U345 (PIN[29] , PIN[37] , GIN[37] , GIN[45] , POUT[29] , GOUT[45] );
|
||||
BLOCK2 U346 (PIN[30] , PIN[38] , GIN[38] , GIN[46] , POUT[30] , GOUT[46] );
|
||||
BLOCK2 U347 (PIN[31] , PIN[39] , GIN[39] , GIN[47] , POUT[31] , GOUT[47] );
|
||||
BLOCK2 U348 (PIN[32] , PIN[40] , GIN[40] , GIN[48] , POUT[32] , GOUT[48] );
|
||||
BLOCK2 U349 (PIN[33] , PIN[41] , GIN[41] , GIN[49] , POUT[33] , GOUT[49] );
|
||||
BLOCK2 U350 (PIN[34] , PIN[42] , GIN[42] , GIN[50] , POUT[34] , GOUT[50] );
|
||||
BLOCK2 U351 (PIN[35] , PIN[43] , GIN[43] , GIN[51] , POUT[35] , GOUT[51] );
|
||||
BLOCK2 U352 (PIN[36] , PIN[44] , GIN[44] , GIN[52] , POUT[36] , GOUT[52] );
|
||||
BLOCK2 U353 (PIN[37] , PIN[45] , GIN[45] , GIN[53] , POUT[37] , GOUT[53] );
|
||||
BLOCK2 U354 (PIN[38] , PIN[46] , GIN[46] , GIN[54] , POUT[38] , GOUT[54] );
|
||||
BLOCK2 U355 (PIN[39] , PIN[47] , GIN[47] , GIN[55] , POUT[39] , GOUT[55] );
|
||||
BLOCK2 U356 (PIN[40] , PIN[48] , GIN[48] , GIN[56] , POUT[40] , GOUT[56] );
|
||||
BLOCK2 U357 (PIN[41] , PIN[49] , GIN[49] , GIN[57] , POUT[41] , GOUT[57] );
|
||||
BLOCK2 U358 (PIN[42] , PIN[50] , GIN[50] , GIN[58] , POUT[42] , GOUT[58] );
|
||||
BLOCK2 U359 (PIN[43] , PIN[51] , GIN[51] , GIN[59] , POUT[43] , GOUT[59] );
|
||||
BLOCK2 U360 (PIN[44] , PIN[52] , GIN[52] , GIN[60] , POUT[44] , GOUT[60] );
|
||||
BLOCK2 U361 (PIN[45] , PIN[53] , GIN[53] , GIN[61] , POUT[45] , GOUT[61] );
|
||||
BLOCK2 U362 (PIN[46] , PIN[54] , GIN[54] , GIN[62] , POUT[46] , GOUT[62] );
|
||||
BLOCK2 U363 (PIN[47] , PIN[55] , GIN[55] , GIN[63] , POUT[47] , GOUT[63] );
|
||||
BLOCK2 U364 (PIN[48] , PIN[56] , GIN[56] , GIN[64] , POUT[48] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_3_64
|
||||
|
||||
|
||||
module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [48:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [32:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
INVBLOCK U12 (GIN[2] , GOUT[2] );
|
||||
INVBLOCK U13 (GIN[3] , GOUT[3] );
|
||||
INVBLOCK U14 (GIN[4] , GOUT[4] );
|
||||
INVBLOCK U15 (GIN[5] , GOUT[5] );
|
||||
INVBLOCK U16 (GIN[6] , GOUT[6] );
|
||||
INVBLOCK U17 (GIN[7] , GOUT[7] );
|
||||
INVBLOCK U18 (GIN[8] , GOUT[8] );
|
||||
INVBLOCK U19 (GIN[9] , GOUT[9] );
|
||||
INVBLOCK U110 (GIN[10] , GOUT[10] );
|
||||
INVBLOCK U111 (GIN[11] , GOUT[11] );
|
||||
INVBLOCK U112 (GIN[12] , GOUT[12] );
|
||||
INVBLOCK U113 (GIN[13] , GOUT[13] );
|
||||
INVBLOCK U114 (GIN[14] , GOUT[14] );
|
||||
INVBLOCK U115 (GIN[15] , GOUT[15] );
|
||||
BLOCK1A U216 (PIN[0] , GIN[0] , GIN[16] , GOUT[16] );
|
||||
BLOCK1A U217 (PIN[1] , GIN[1] , GIN[17] , GOUT[17] );
|
||||
BLOCK1A U218 (PIN[2] , GIN[2] , GIN[18] , GOUT[18] );
|
||||
BLOCK1A U219 (PIN[3] , GIN[3] , GIN[19] , GOUT[19] );
|
||||
BLOCK1A U220 (PIN[4] , GIN[4] , GIN[20] , GOUT[20] );
|
||||
BLOCK1A U221 (PIN[5] , GIN[5] , GIN[21] , GOUT[21] );
|
||||
BLOCK1A U222 (PIN[6] , GIN[6] , GIN[22] , GOUT[22] );
|
||||
BLOCK1A U223 (PIN[7] , GIN[7] , GIN[23] , GOUT[23] );
|
||||
BLOCK1A U224 (PIN[8] , GIN[8] , GIN[24] , GOUT[24] );
|
||||
BLOCK1A U225 (PIN[9] , GIN[9] , GIN[25] , GOUT[25] );
|
||||
BLOCK1A U226 (PIN[10] , GIN[10] , GIN[26] , GOUT[26] );
|
||||
BLOCK1A U227 (PIN[11] , GIN[11] , GIN[27] , GOUT[27] );
|
||||
BLOCK1A U228 (PIN[12] , GIN[12] , GIN[28] , GOUT[28] );
|
||||
BLOCK1A U229 (PIN[13] , GIN[13] , GIN[29] , GOUT[29] );
|
||||
BLOCK1A U230 (PIN[14] , GIN[14] , GIN[30] , GOUT[30] );
|
||||
BLOCK1A U231 (PIN[15] , GIN[15] , GIN[31] , GOUT[31] );
|
||||
BLOCK1 U332 (PIN[0] , PIN[16] , GIN[16] , GIN[32] , POUT[0] , GOUT[32] );
|
||||
BLOCK1 U333 (PIN[1] , PIN[17] , GIN[17] , GIN[33] , POUT[1] , GOUT[33] );
|
||||
BLOCK1 U334 (PIN[2] , PIN[18] , GIN[18] , GIN[34] , POUT[2] , GOUT[34] );
|
||||
BLOCK1 U335 (PIN[3] , PIN[19] , GIN[19] , GIN[35] , POUT[3] , GOUT[35] );
|
||||
BLOCK1 U336 (PIN[4] , PIN[20] , GIN[20] , GIN[36] , POUT[4] , GOUT[36] );
|
||||
BLOCK1 U337 (PIN[5] , PIN[21] , GIN[21] , GIN[37] , POUT[5] , GOUT[37] );
|
||||
BLOCK1 U338 (PIN[6] , PIN[22] , GIN[22] , GIN[38] , POUT[6] , GOUT[38] );
|
||||
BLOCK1 U339 (PIN[7] , PIN[23] , GIN[23] , GIN[39] , POUT[7] , GOUT[39] );
|
||||
BLOCK1 U340 (PIN[8] , PIN[24] , GIN[24] , GIN[40] , POUT[8] , GOUT[40] );
|
||||
BLOCK1 U341 (PIN[9] , PIN[25] , GIN[25] , GIN[41] , POUT[9] , GOUT[41] );
|
||||
BLOCK1 U342 (PIN[10] , PIN[26] , GIN[26] , GIN[42] , POUT[10] , GOUT[42] );
|
||||
BLOCK1 U343 (PIN[11] , PIN[27] , GIN[27] , GIN[43] , POUT[11] , GOUT[43] );
|
||||
BLOCK1 U344 (PIN[12] , PIN[28] , GIN[28] , GIN[44] , POUT[12] , GOUT[44] );
|
||||
BLOCK1 U345 (PIN[13] , PIN[29] , GIN[29] , GIN[45] , POUT[13] , GOUT[45] );
|
||||
BLOCK1 U346 (PIN[14] , PIN[30] , GIN[30] , GIN[46] , POUT[14] , GOUT[46] );
|
||||
BLOCK1 U347 (PIN[15] , PIN[31] , GIN[31] , GIN[47] , POUT[15] , GOUT[47] );
|
||||
BLOCK1 U348 (PIN[16] , PIN[32] , GIN[32] , GIN[48] , POUT[16] , GOUT[48] );
|
||||
BLOCK1 U349 (PIN[17] , PIN[33] , GIN[33] , GIN[49] , POUT[17] , GOUT[49] );
|
||||
BLOCK1 U350 (PIN[18] , PIN[34] , GIN[34] , GIN[50] , POUT[18] , GOUT[50] );
|
||||
BLOCK1 U351 (PIN[19] , PIN[35] , GIN[35] , GIN[51] , POUT[19] , GOUT[51] );
|
||||
BLOCK1 U352 (PIN[20] , PIN[36] , GIN[36] , GIN[52] , POUT[20] , GOUT[52] );
|
||||
BLOCK1 U353 (PIN[21] , PIN[37] , GIN[37] , GIN[53] , POUT[21] , GOUT[53] );
|
||||
BLOCK1 U354 (PIN[22] , PIN[38] , GIN[38] , GIN[54] , POUT[22] , GOUT[54] );
|
||||
BLOCK1 U355 (PIN[23] , PIN[39] , GIN[39] , GIN[55] , POUT[23] , GOUT[55] );
|
||||
BLOCK1 U356 (PIN[24] , PIN[40] , GIN[40] , GIN[56] , POUT[24] , GOUT[56] );
|
||||
BLOCK1 U357 (PIN[25] , PIN[41] , GIN[41] , GIN[57] , POUT[25] , GOUT[57] );
|
||||
BLOCK1 U358 (PIN[26] , PIN[42] , GIN[42] , GIN[58] , POUT[26] , GOUT[58] );
|
||||
BLOCK1 U359 (PIN[27] , PIN[43] , GIN[43] , GIN[59] , POUT[27] , GOUT[59] );
|
||||
BLOCK1 U360 (PIN[28] , PIN[44] , GIN[44] , GIN[60] , POUT[28] , GOUT[60] );
|
||||
BLOCK1 U361 (PIN[29] , PIN[45] , GIN[45] , GIN[61] , POUT[29] , GOUT[61] );
|
||||
BLOCK1 U362 (PIN[30] , PIN[46] , GIN[46] , GIN[62] , POUT[30] , GOUT[62] );
|
||||
BLOCK1 U363 (PIN[31] , PIN[47] , GIN[47] , GIN[63] , POUT[31] , GOUT[63] );
|
||||
BLOCK1 U364 (PIN[32] , PIN[48] , GIN[48] , GIN[64] , POUT[32] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_4_64
|
||||
|
||||
|
||||
module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [32:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
INVBLOCK U12 (GIN[2] , GOUT[2] );
|
||||
INVBLOCK U13 (GIN[3] , GOUT[3] );
|
||||
INVBLOCK U14 (GIN[4] , GOUT[4] );
|
||||
INVBLOCK U15 (GIN[5] , GOUT[5] );
|
||||
INVBLOCK U16 (GIN[6] , GOUT[6] );
|
||||
INVBLOCK U17 (GIN[7] , GOUT[7] );
|
||||
INVBLOCK U18 (GIN[8] , GOUT[8] );
|
||||
INVBLOCK U19 (GIN[9] , GOUT[9] );
|
||||
INVBLOCK U110 (GIN[10] , GOUT[10] );
|
||||
INVBLOCK U111 (GIN[11] , GOUT[11] );
|
||||
INVBLOCK U112 (GIN[12] , GOUT[12] );
|
||||
INVBLOCK U113 (GIN[13] , GOUT[13] );
|
||||
INVBLOCK U114 (GIN[14] , GOUT[14] );
|
||||
INVBLOCK U115 (GIN[15] , GOUT[15] );
|
||||
INVBLOCK U116 (GIN[16] , GOUT[16] );
|
||||
INVBLOCK U117 (GIN[17] , GOUT[17] );
|
||||
INVBLOCK U118 (GIN[18] , GOUT[18] );
|
||||
INVBLOCK U119 (GIN[19] , GOUT[19] );
|
||||
INVBLOCK U120 (GIN[20] , GOUT[20] );
|
||||
INVBLOCK U121 (GIN[21] , GOUT[21] );
|
||||
INVBLOCK U122 (GIN[22] , GOUT[22] );
|
||||
INVBLOCK U123 (GIN[23] , GOUT[23] );
|
||||
INVBLOCK U124 (GIN[24] , GOUT[24] );
|
||||
INVBLOCK U125 (GIN[25] , GOUT[25] );
|
||||
INVBLOCK U126 (GIN[26] , GOUT[26] );
|
||||
INVBLOCK U127 (GIN[27] , GOUT[27] );
|
||||
INVBLOCK U128 (GIN[28] , GOUT[28] );
|
||||
INVBLOCK U129 (GIN[29] , GOUT[29] );
|
||||
INVBLOCK U130 (GIN[30] , GOUT[30] );
|
||||
INVBLOCK U131 (GIN[31] , GOUT[31] );
|
||||
BLOCK2A U232 (PIN[0] , GIN[0] , GIN[32] , GOUT[32] );
|
||||
BLOCK2A U233 (PIN[1] , GIN[1] , GIN[33] , GOUT[33] );
|
||||
BLOCK2A U234 (PIN[2] , GIN[2] , GIN[34] , GOUT[34] );
|
||||
BLOCK2A U235 (PIN[3] , GIN[3] , GIN[35] , GOUT[35] );
|
||||
BLOCK2A U236 (PIN[4] , GIN[4] , GIN[36] , GOUT[36] );
|
||||
BLOCK2A U237 (PIN[5] , GIN[5] , GIN[37] , GOUT[37] );
|
||||
BLOCK2A U238 (PIN[6] , GIN[6] , GIN[38] , GOUT[38] );
|
||||
BLOCK2A U239 (PIN[7] , GIN[7] , GIN[39] , GOUT[39] );
|
||||
BLOCK2A U240 (PIN[8] , GIN[8] , GIN[40] , GOUT[40] );
|
||||
BLOCK2A U241 (PIN[9] , GIN[9] , GIN[41] , GOUT[41] );
|
||||
BLOCK2A U242 (PIN[10] , GIN[10] , GIN[42] , GOUT[42] );
|
||||
BLOCK2A U243 (PIN[11] , GIN[11] , GIN[43] , GOUT[43] );
|
||||
BLOCK2A U244 (PIN[12] , GIN[12] , GIN[44] , GOUT[44] );
|
||||
BLOCK2A U245 (PIN[13] , GIN[13] , GIN[45] , GOUT[45] );
|
||||
BLOCK2A U246 (PIN[14] , GIN[14] , GIN[46] , GOUT[46] );
|
||||
BLOCK2A U247 (PIN[15] , GIN[15] , GIN[47] , GOUT[47] );
|
||||
BLOCK2A U248 (PIN[16] , GIN[16] , GIN[48] , GOUT[48] );
|
||||
BLOCK2A U249 (PIN[17] , GIN[17] , GIN[49] , GOUT[49] );
|
||||
BLOCK2A U250 (PIN[18] , GIN[18] , GIN[50] , GOUT[50] );
|
||||
BLOCK2A U251 (PIN[19] , GIN[19] , GIN[51] , GOUT[51] );
|
||||
BLOCK2A U252 (PIN[20] , GIN[20] , GIN[52] , GOUT[52] );
|
||||
BLOCK2A U253 (PIN[21] , GIN[21] , GIN[53] , GOUT[53] );
|
||||
BLOCK2A U254 (PIN[22] , GIN[22] , GIN[54] , GOUT[54] );
|
||||
BLOCK2A U255 (PIN[23] , GIN[23] , GIN[55] , GOUT[55] );
|
||||
BLOCK2A U256 (PIN[24] , GIN[24] , GIN[56] , GOUT[56] );
|
||||
BLOCK2A U257 (PIN[25] , GIN[25] , GIN[57] , GOUT[57] );
|
||||
BLOCK2A U258 (PIN[26] , GIN[26] , GIN[58] , GOUT[58] );
|
||||
BLOCK2A U259 (PIN[27] , GIN[27] , GIN[59] , GOUT[59] );
|
||||
BLOCK2A U260 (PIN[28] , GIN[28] , GIN[60] , GOUT[60] );
|
||||
BLOCK2A U261 (PIN[29] , GIN[29] , GIN[61] , GOUT[61] );
|
||||
BLOCK2A U262 (PIN[30] , GIN[30] , GIN[62] , GOUT[62] );
|
||||
BLOCK2A U263 (PIN[31] , GIN[31] , GIN[63] , GOUT[63] );
|
||||
BLOCK2 U364 (PIN[0] , PIN[32] , GIN[32] , GIN[64] , POUT[0] , GOUT[64] );
|
||||
|
||||
endmodule // DBLC_5_64
|
||||
|
||||
|
||||
module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
|
||||
|
||||
input [63:0] A;
|
||||
input [63:0] B;
|
||||
input PBIT;
|
||||
input [64:0] CARRY;
|
||||
|
||||
output [63:0] SUM;
|
||||
output COUT;
|
||||
|
||||
XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
|
||||
XXOR1 U21 (A[1] , B[1] , CARRY[1] , SUM[1] );
|
||||
XXOR1 U22 (A[2] , B[2] , CARRY[2] , SUM[2] );
|
||||
XXOR1 U23 (A[3] , B[3] , CARRY[3] , SUM[3] );
|
||||
XXOR1 U24 (A[4] , B[4] , CARRY[4] , SUM[4] );
|
||||
XXOR1 U25 (A[5] , B[5] , CARRY[5] , SUM[5] );
|
||||
XXOR1 U26 (A[6] , B[6] , CARRY[6] , SUM[6] );
|
||||
XXOR1 U27 (A[7] , B[7] , CARRY[7] , SUM[7] );
|
||||
XXOR1 U28 (A[8] , B[8] , CARRY[8] , SUM[8] );
|
||||
XXOR1 U29 (A[9] , B[9] , CARRY[9] , SUM[9] );
|
||||
XXOR1 U210 (A[10] , B[10] , CARRY[10] , SUM[10] );
|
||||
XXOR1 U211 (A[11] , B[11] , CARRY[11] , SUM[11] );
|
||||
XXOR1 U212 (A[12] , B[12] , CARRY[12] , SUM[12] );
|
||||
XXOR1 U213 (A[13] , B[13] , CARRY[13] , SUM[13] );
|
||||
XXOR1 U214 (A[14] , B[14] , CARRY[14] , SUM[14] );
|
||||
XXOR1 U215 (A[15] , B[15] , CARRY[15] , SUM[15] );
|
||||
XXOR1 U216 (A[16] , B[16] , CARRY[16] , SUM[16] );
|
||||
XXOR1 U217 (A[17] , B[17] , CARRY[17] , SUM[17] );
|
||||
XXOR1 U218 (A[18] , B[18] , CARRY[18] , SUM[18] );
|
||||
XXOR1 U219 (A[19] , B[19] , CARRY[19] , SUM[19] );
|
||||
XXOR1 U220 (A[20] , B[20] , CARRY[20] , SUM[20] );
|
||||
XXOR1 U221 (A[21] , B[21] , CARRY[21] , SUM[21] );
|
||||
XXOR1 U222 (A[22] , B[22] , CARRY[22] , SUM[22] );
|
||||
XXOR1 U223 (A[23] , B[23] , CARRY[23] , SUM[23] );
|
||||
XXOR1 U224 (A[24] , B[24] , CARRY[24] , SUM[24] );
|
||||
XXOR1 U225 (A[25] , B[25] , CARRY[25] , SUM[25] );
|
||||
XXOR1 U226 (A[26] , B[26] , CARRY[26] , SUM[26] );
|
||||
XXOR1 U227 (A[27] , B[27] , CARRY[27] , SUM[27] );
|
||||
XXOR1 U228 (A[28] , B[28] , CARRY[28] , SUM[28] );
|
||||
XXOR1 U229 (A[29] , B[29] , CARRY[29] , SUM[29] );
|
||||
XXOR1 U230 (A[30] , B[30] , CARRY[30] , SUM[30] );
|
||||
XXOR1 U231 (A[31] , B[31] , CARRY[31] , SUM[31] );
|
||||
XXOR1 U232 (A[32] , B[32] , CARRY[32] , SUM[32] );
|
||||
XXOR1 U233 (A[33] , B[33] , CARRY[33] , SUM[33] );
|
||||
XXOR1 U234 (A[34] , B[34] , CARRY[34] , SUM[34] );
|
||||
XXOR1 U235 (A[35] , B[35] , CARRY[35] , SUM[35] );
|
||||
XXOR1 U236 (A[36] , B[36] , CARRY[36] , SUM[36] );
|
||||
XXOR1 U237 (A[37] , B[37] , CARRY[37] , SUM[37] );
|
||||
XXOR1 U238 (A[38] , B[38] , CARRY[38] , SUM[38] );
|
||||
XXOR1 U239 (A[39] , B[39] , CARRY[39] , SUM[39] );
|
||||
XXOR1 U240 (A[40] , B[40] , CARRY[40] , SUM[40] );
|
||||
XXOR1 U241 (A[41] , B[41] , CARRY[41] , SUM[41] );
|
||||
XXOR1 U242 (A[42] , B[42] , CARRY[42] , SUM[42] );
|
||||
XXOR1 U243 (A[43] , B[43] , CARRY[43] , SUM[43] );
|
||||
XXOR1 U244 (A[44] , B[44] , CARRY[44] , SUM[44] );
|
||||
XXOR1 U245 (A[45] , B[45] , CARRY[45] , SUM[45] );
|
||||
XXOR1 U246 (A[46] , B[46] , CARRY[46] , SUM[46] );
|
||||
XXOR1 U247 (A[47] , B[47] , CARRY[47] , SUM[47] );
|
||||
XXOR1 U248 (A[48] , B[48] , CARRY[48] , SUM[48] );
|
||||
XXOR1 U249 (A[49] , B[49] , CARRY[49] , SUM[49] );
|
||||
XXOR1 U250 (A[50] , B[50] , CARRY[50] , SUM[50] );
|
||||
XXOR1 U251 (A[51] , B[51] , CARRY[51] , SUM[51] );
|
||||
XXOR1 U252 (A[52] , B[52] , CARRY[52] , SUM[52] );
|
||||
XXOR1 U253 (A[53] , B[53] , CARRY[53] , SUM[53] );
|
||||
XXOR1 U254 (A[54] , B[54] , CARRY[54] , SUM[54] );
|
||||
XXOR1 U255 (A[55] , B[55] , CARRY[55] , SUM[55] );
|
||||
XXOR1 U256 (A[56] , B[56] , CARRY[56] , SUM[56] );
|
||||
XXOR1 U257 (A[57] , B[57] , CARRY[57] , SUM[57] );
|
||||
XXOR1 U258 (A[58] , B[58] , CARRY[58] , SUM[58] );
|
||||
XXOR1 U259 (A[59] , B[59] , CARRY[59] , SUM[59] );
|
||||
XXOR1 U260 (A[60] , B[60] , CARRY[60] , SUM[60] );
|
||||
XXOR1 U261 (A[61] , B[61] , CARRY[61] , SUM[61] );
|
||||
XXOR1 U262 (A[62] , B[62] , CARRY[62] , SUM[62] );
|
||||
XXOR1 U263 (A[63] , B[63] , CARRY[63] , SUM[63] );
|
||||
BLOCK1A U1 (PBIT , CARRY[0] , CARRY[64] , COUT );
|
||||
|
||||
endmodule // XORSTAGE_64
|
||||
|
||||
|
||||
module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
|
||||
|
||||
input [63:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [64:0] GOUT;
|
||||
output [0:0] POUT;
|
||||
|
||||
wire [62:0] INTPROP_0;
|
||||
wire [64:0] INTGEN_0;
|
||||
wire [60:0] INTPROP_1;
|
||||
wire [64:0] INTGEN_1;
|
||||
wire [56:0] INTPROP_2;
|
||||
wire [64:0] INTGEN_2;
|
||||
wire [48:0] INTPROP_3;
|
||||
wire [64:0] INTGEN_3;
|
||||
wire [32:0] INTPROP_4;
|
||||
wire [64:0] INTGEN_4;
|
||||
|
||||
DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
|
||||
DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
|
||||
DBLC_2_64 U_2 (.PIN(INTPROP_1) , .GIN(INTGEN_1) , .POUT(INTPROP_2) , .GOUT(INTGEN_2) );
|
||||
DBLC_3_64 U_3 (.PIN(INTPROP_2) , .GIN(INTGEN_2) , .POUT(INTPROP_3) , .GOUT(INTGEN_3) );
|
||||
DBLC_4_64 U_4 (.PIN(INTPROP_3) , .GIN(INTGEN_3) , .POUT(INTPROP_4) , .GOUT(INTGEN_4) );
|
||||
DBLC_5_64 U_5 (.PIN(INTPROP_4) , .GIN(INTGEN_4) , .POUT(POUT) , .GOUT(GOUT) );
|
||||
|
||||
endmodule // DBLCTREE_64
|
||||
|
||||
|
||||
module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
|
||||
|
||||
input [63:0] OPA;
|
||||
input [63:0] OPB;
|
||||
input CIN;
|
||||
|
||||
output [63:0] SUM;
|
||||
output COUT;
|
||||
|
||||
wire [63:0] INTPROP;
|
||||
wire [64:0] INTGEN;
|
||||
wire [0:0] PBIT;
|
||||
wire [64:0] CARRY;
|
||||
|
||||
PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
|
||||
DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
|
||||
XORSTAGE_64 U3 (OPA[63:0] , OPB[63:0] , PBIT[0] , CARRY[64:0] , SUM , COUT );
|
||||
|
||||
endmodule
|
@ -1,332 +0,0 @@
|
||||
// This module implements a 12-bit carry lookahead adder. It is used
|
||||
// for rounding in the floating point adder.
|
||||
|
||||
module cla12 (S, CO, X, Y);
|
||||
|
||||
input [11:0] X;
|
||||
input [11:0] Y;
|
||||
|
||||
output [11:0] S;
|
||||
output CO;
|
||||
|
||||
wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_64;
|
||||
|
||||
assign LOGIC0 = 0;
|
||||
assign CIN = 0;
|
||||
|
||||
DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64);
|
||||
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = LOGIC0;
|
||||
assign B[12] = LOGIC0;
|
||||
assign A[13] = LOGIC0;
|
||||
assign B[13] = LOGIC0;
|
||||
assign A[14] = LOGIC0;
|
||||
assign B[14] = LOGIC0;
|
||||
assign A[15] = LOGIC0;
|
||||
assign B[15] = LOGIC0;
|
||||
assign A[16] = LOGIC0;
|
||||
assign B[16] = LOGIC0;
|
||||
assign A[17] = LOGIC0;
|
||||
assign B[17] = LOGIC0;
|
||||
assign A[18] = LOGIC0;
|
||||
assign B[18] = LOGIC0;
|
||||
assign A[19] = LOGIC0;
|
||||
assign B[19] = LOGIC0;
|
||||
assign A[20] = LOGIC0;
|
||||
assign B[20] = LOGIC0;
|
||||
assign A[21] = LOGIC0;
|
||||
assign B[21] = LOGIC0;
|
||||
assign A[22] = LOGIC0;
|
||||
assign B[22] = LOGIC0;
|
||||
assign A[23] = LOGIC0;
|
||||
assign B[23] = LOGIC0;
|
||||
assign A[24] = LOGIC0;
|
||||
assign B[24] = LOGIC0;
|
||||
assign A[25] = LOGIC0;
|
||||
assign B[25] = LOGIC0;
|
||||
assign A[26] = LOGIC0;
|
||||
assign B[26] = LOGIC0;
|
||||
assign A[27] = LOGIC0;
|
||||
assign B[27] = LOGIC0;
|
||||
assign A[28] = LOGIC0;
|
||||
assign B[28] = LOGIC0;
|
||||
assign A[29] = LOGIC0;
|
||||
assign B[29] = LOGIC0;
|
||||
assign A[30] = LOGIC0;
|
||||
assign B[30] = LOGIC0;
|
||||
assign A[31] = LOGIC0;
|
||||
assign B[31] = LOGIC0;
|
||||
assign A[32] = LOGIC0;
|
||||
assign B[32] = LOGIC0;
|
||||
assign A[33] = LOGIC0;
|
||||
assign B[33] = LOGIC0;
|
||||
assign A[34] = LOGIC0;
|
||||
assign B[34] = LOGIC0;
|
||||
assign A[35] = LOGIC0;
|
||||
assign B[35] = LOGIC0;
|
||||
assign A[36] = LOGIC0;
|
||||
assign B[36] = LOGIC0;
|
||||
assign A[37] = LOGIC0;
|
||||
assign B[37] = LOGIC0;
|
||||
assign A[38] = LOGIC0;
|
||||
assign B[38] = LOGIC0;
|
||||
assign A[39] = LOGIC0;
|
||||
assign B[39] = LOGIC0;
|
||||
assign A[40] = LOGIC0;
|
||||
assign B[40] = LOGIC0;
|
||||
assign A[41] = LOGIC0;
|
||||
assign B[41] = LOGIC0;
|
||||
assign A[42] = LOGIC0;
|
||||
assign B[42] = LOGIC0;
|
||||
assign A[43] = LOGIC0;
|
||||
assign B[43] = LOGIC0;
|
||||
assign A[44] = LOGIC0;
|
||||
assign B[44] = LOGIC0;
|
||||
assign A[45] = LOGIC0;
|
||||
assign B[45] = LOGIC0;
|
||||
assign A[46] = LOGIC0;
|
||||
assign B[46] = LOGIC0;
|
||||
assign A[47] = LOGIC0;
|
||||
assign B[47] = LOGIC0;
|
||||
assign A[48] = LOGIC0;
|
||||
assign B[48] = LOGIC0;
|
||||
assign A[49] = LOGIC0;
|
||||
assign B[49] = LOGIC0;
|
||||
assign A[50] = LOGIC0;
|
||||
assign B[50] = LOGIC0;
|
||||
assign A[51] = LOGIC0;
|
||||
assign B[51] = LOGIC0;
|
||||
assign A[52] = LOGIC0;
|
||||
assign B[52] = LOGIC0;
|
||||
assign A[53] = LOGIC0;
|
||||
assign B[53] = LOGIC0;
|
||||
assign A[54] = LOGIC0;
|
||||
assign B[54] = LOGIC0;
|
||||
assign A[55] = LOGIC0;
|
||||
assign B[55] = LOGIC0;
|
||||
assign A[56] = LOGIC0;
|
||||
assign B[56] = LOGIC0;
|
||||
assign A[57] = LOGIC0;
|
||||
assign B[57] = LOGIC0;
|
||||
assign A[58] = LOGIC0;
|
||||
assign B[58] = LOGIC0;
|
||||
assign A[59] = LOGIC0;
|
||||
assign B[59] = LOGIC0;
|
||||
assign A[60] = LOGIC0;
|
||||
assign B[60] = LOGIC0;
|
||||
assign A[61] = LOGIC0;
|
||||
assign B[61] = LOGIC0;
|
||||
assign A[62] = LOGIC0;
|
||||
assign B[62] = LOGIC0;
|
||||
assign A[63] = LOGIC0;
|
||||
assign B[63] = LOGIC0;
|
||||
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign CO = Q[12];
|
||||
|
||||
endmodule //cla52
|
||||
|
||||
// This module implements a 12-bit carry lookahead subtractor. It is used
|
||||
// for rounding in the floating point adder.
|
||||
|
||||
module cla_sub12 (S, X, Y);
|
||||
|
||||
input [11:0] X;
|
||||
input [11:0] Y;
|
||||
|
||||
output [11:0] S;
|
||||
|
||||
wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
|
||||
wire CO;
|
||||
wire LOGIC0;
|
||||
wire VDD;
|
||||
logic CO_12;
|
||||
|
||||
assign Bbar = ~B;
|
||||
assign LOGIC0 = 0;
|
||||
assign VDD = 1;
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO);
|
||||
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = LOGIC0;
|
||||
assign B[12] = LOGIC0;
|
||||
assign A[13] = LOGIC0;
|
||||
assign B[13] = LOGIC0;
|
||||
assign A[14] = LOGIC0;
|
||||
assign B[14] = LOGIC0;
|
||||
assign A[15] = LOGIC0;
|
||||
assign B[15] = LOGIC0;
|
||||
assign A[16] = LOGIC0;
|
||||
assign B[16] = LOGIC0;
|
||||
assign A[17] = LOGIC0;
|
||||
assign B[17] = LOGIC0;
|
||||
assign A[18] = LOGIC0;
|
||||
assign B[18] = LOGIC0;
|
||||
assign A[19] = LOGIC0;
|
||||
assign B[19] = LOGIC0;
|
||||
assign A[20] = LOGIC0;
|
||||
assign B[20] = LOGIC0;
|
||||
assign A[21] = LOGIC0;
|
||||
assign B[21] = LOGIC0;
|
||||
assign A[22] = LOGIC0;
|
||||
assign B[22] = LOGIC0;
|
||||
assign A[23] = LOGIC0;
|
||||
assign B[23] = LOGIC0;
|
||||
assign A[24] = LOGIC0;
|
||||
assign B[24] = LOGIC0;
|
||||
assign A[25] = LOGIC0;
|
||||
assign B[25] = LOGIC0;
|
||||
assign A[26] = LOGIC0;
|
||||
assign B[26] = LOGIC0;
|
||||
assign A[27] = LOGIC0;
|
||||
assign B[27] = LOGIC0;
|
||||
assign A[28] = LOGIC0;
|
||||
assign B[28] = LOGIC0;
|
||||
assign A[29] = LOGIC0;
|
||||
assign B[29] = LOGIC0;
|
||||
assign A[30] = LOGIC0;
|
||||
assign B[30] = LOGIC0;
|
||||
assign A[31] = LOGIC0;
|
||||
assign B[31] = LOGIC0;
|
||||
assign A[32] = LOGIC0;
|
||||
assign B[32] = LOGIC0;
|
||||
assign A[33] = LOGIC0;
|
||||
assign B[33] = LOGIC0;
|
||||
assign A[34] = LOGIC0;
|
||||
assign B[34] = LOGIC0;
|
||||
assign A[35] = LOGIC0;
|
||||
assign B[35] = LOGIC0;
|
||||
assign A[36] = LOGIC0;
|
||||
assign B[36] = LOGIC0;
|
||||
assign A[37] = LOGIC0;
|
||||
assign B[37] = LOGIC0;
|
||||
assign A[38] = LOGIC0;
|
||||
assign B[38] = LOGIC0;
|
||||
assign A[39] = LOGIC0;
|
||||
assign B[39] = LOGIC0;
|
||||
assign A[40] = LOGIC0;
|
||||
assign B[40] = LOGIC0;
|
||||
assign A[41] = LOGIC0;
|
||||
assign B[41] = LOGIC0;
|
||||
assign A[42] = LOGIC0;
|
||||
assign B[42] = LOGIC0;
|
||||
assign A[43] = LOGIC0;
|
||||
assign B[43] = LOGIC0;
|
||||
assign A[44] = LOGIC0;
|
||||
assign B[44] = LOGIC0;
|
||||
assign A[45] = LOGIC0;
|
||||
assign B[45] = LOGIC0;
|
||||
assign A[46] = LOGIC0;
|
||||
assign B[46] = LOGIC0;
|
||||
assign A[47] = LOGIC0;
|
||||
assign B[47] = LOGIC0;
|
||||
assign A[48] = LOGIC0;
|
||||
assign B[48] = LOGIC0;
|
||||
assign A[49] = LOGIC0;
|
||||
assign B[49] = LOGIC0;
|
||||
assign A[50] = LOGIC0;
|
||||
assign B[50] = LOGIC0;
|
||||
assign A[51] = LOGIC0;
|
||||
assign B[51] = LOGIC0;
|
||||
assign A[52] = LOGIC0;
|
||||
assign B[52] = LOGIC0;
|
||||
assign A[53] = LOGIC0;
|
||||
assign B[53] = LOGIC0;
|
||||
assign A[54] = LOGIC0;
|
||||
assign B[54] = LOGIC0;
|
||||
assign A[55] = LOGIC0;
|
||||
assign B[55] = LOGIC0;
|
||||
assign A[56] = LOGIC0;
|
||||
assign B[56] = LOGIC0;
|
||||
assign A[57] = LOGIC0;
|
||||
assign B[57] = LOGIC0;
|
||||
assign A[58] = LOGIC0;
|
||||
assign B[58] = LOGIC0;
|
||||
assign A[59] = LOGIC0;
|
||||
assign B[59] = LOGIC0;
|
||||
assign A[60] = LOGIC0;
|
||||
assign B[60] = LOGIC0;
|
||||
assign A[61] = LOGIC0;
|
||||
assign B[61] = LOGIC0;
|
||||
assign A[62] = LOGIC0;
|
||||
assign B[62] = LOGIC0;
|
||||
assign A[63] = LOGIC0;
|
||||
assign B[63] = LOGIC0;
|
||||
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign CO_12 = Q[12];
|
||||
|
||||
endmodule //cla_sub52
|
@ -1,409 +0,0 @@
|
||||
// This module implements a 52-bit carry lookahead adder. It is used
|
||||
// for rounding in the floating point adder.
|
||||
|
||||
module cla52 (S, CO, X, Y);
|
||||
|
||||
input [51:0] X;
|
||||
input [51:0] Y;
|
||||
|
||||
output [51:0] S;
|
||||
output CO;
|
||||
|
||||
wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_64;
|
||||
|
||||
assign LOGIC0 = 0;
|
||||
assign CIN = 0;
|
||||
DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64);
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = X[12];
|
||||
assign B[12] = Y[12];
|
||||
assign A[13] = X[13];
|
||||
assign B[13] = Y[13];
|
||||
assign A[14] = X[14];
|
||||
assign B[14] = Y[14];
|
||||
assign A[15] = X[15];
|
||||
assign B[15] = Y[15];
|
||||
assign A[16] = X[16];
|
||||
assign B[16] = Y[16];
|
||||
assign A[17] = X[17];
|
||||
assign B[17] = Y[17];
|
||||
assign A[18] = X[18];
|
||||
assign B[18] = Y[18];
|
||||
assign A[19] = X[19];
|
||||
assign B[19] = Y[19];
|
||||
assign A[20] = X[20];
|
||||
assign B[20] = Y[20];
|
||||
assign A[21] = X[21];
|
||||
assign B[21] = Y[21];
|
||||
assign A[22] = X[22];
|
||||
assign B[22] = Y[22];
|
||||
assign A[23] = X[23];
|
||||
assign B[23] = Y[23];
|
||||
assign A[24] = X[24];
|
||||
assign B[24] = Y[24];
|
||||
assign A[25] = X[25];
|
||||
assign B[25] = Y[25];
|
||||
assign A[26] = X[26];
|
||||
assign B[26] = Y[26];
|
||||
assign A[27] = X[27];
|
||||
assign B[27] = Y[27];
|
||||
assign A[28] = X[28];
|
||||
assign B[28] = Y[28];
|
||||
assign A[29] = X[29];
|
||||
assign B[29] = Y[29];
|
||||
assign A[30] = X[30];
|
||||
assign B[30] = Y[30];
|
||||
assign A[31] = X[31];
|
||||
assign B[31] = Y[31];
|
||||
assign A[32] = X[32];
|
||||
assign B[32] = Y[32];
|
||||
assign A[33] = X[33];
|
||||
assign B[33] = Y[33];
|
||||
assign A[34] = X[34];
|
||||
assign B[34] = Y[34];
|
||||
assign A[35] = X[35];
|
||||
assign B[35] = Y[35];
|
||||
assign A[36] = X[36];
|
||||
assign B[36] = Y[36];
|
||||
assign A[37] = X[37];
|
||||
assign B[37] = Y[37];
|
||||
assign A[38] = X[38];
|
||||
assign B[38] = Y[38];
|
||||
assign A[39] = X[39];
|
||||
assign B[39] = Y[39];
|
||||
assign A[40] = X[40];
|
||||
assign B[40] = Y[40];
|
||||
assign A[41] = X[41];
|
||||
assign B[41] = Y[41];
|
||||
assign A[42] = X[42];
|
||||
assign B[42] = Y[42];
|
||||
assign A[43] = X[43];
|
||||
assign B[43] = Y[43];
|
||||
assign A[44] = X[44];
|
||||
assign B[44] = Y[44];
|
||||
assign A[45] = X[45];
|
||||
assign B[45] = Y[45];
|
||||
assign A[46] = X[46];
|
||||
assign B[46] = Y[46];
|
||||
assign A[47] = X[47];
|
||||
assign B[47] = Y[47];
|
||||
assign A[48] = X[48];
|
||||
assign B[48] = Y[48];
|
||||
assign A[49] = X[49];
|
||||
assign B[49] = Y[49];
|
||||
assign A[50] = X[50];
|
||||
assign B[50] = Y[50];
|
||||
assign A[51] = X[51];
|
||||
assign B[51] = Y[51];
|
||||
assign A[52] = LOGIC0;
|
||||
assign B[52] = LOGIC0;
|
||||
assign A[53] = LOGIC0;
|
||||
assign B[53] = LOGIC0;
|
||||
assign A[54] = LOGIC0;
|
||||
assign B[54] = LOGIC0;
|
||||
assign A[55] = LOGIC0;
|
||||
assign B[55] = LOGIC0;
|
||||
assign A[56] = LOGIC0;
|
||||
assign B[56] = LOGIC0;
|
||||
assign A[57] = LOGIC0;
|
||||
assign B[57] = LOGIC0;
|
||||
assign A[58] = LOGIC0;
|
||||
assign B[58] = LOGIC0;
|
||||
assign A[59] = LOGIC0;
|
||||
assign B[59] = LOGIC0;
|
||||
assign A[60] = LOGIC0;
|
||||
assign B[60] = LOGIC0;
|
||||
assign A[61] = LOGIC0;
|
||||
assign B[61] = LOGIC0;
|
||||
assign A[62] = LOGIC0;
|
||||
assign B[62] = LOGIC0;
|
||||
assign A[63] = LOGIC0;
|
||||
assign B[63] = LOGIC0;
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign S[12] = Q[12];
|
||||
assign S[13] = Q[13];
|
||||
assign S[14] = Q[14];
|
||||
assign S[15] = Q[15];
|
||||
assign S[16] = Q[16];
|
||||
assign S[17] = Q[17];
|
||||
assign S[18] = Q[18];
|
||||
assign S[19] = Q[19];
|
||||
assign S[20] = Q[20];
|
||||
assign S[21] = Q[21];
|
||||
assign S[22] = Q[22];
|
||||
assign S[23] = Q[23];
|
||||
assign S[24] = Q[24];
|
||||
assign S[25] = Q[25];
|
||||
assign S[26] = Q[26];
|
||||
assign S[27] = Q[27];
|
||||
assign S[28] = Q[28];
|
||||
assign S[29] = Q[29];
|
||||
assign S[30] = Q[30];
|
||||
assign S[31] = Q[31];
|
||||
assign S[32] = Q[32];
|
||||
assign S[33] = Q[33];
|
||||
assign S[34] = Q[34];
|
||||
assign S[35] = Q[35];
|
||||
assign S[36] = Q[36];
|
||||
assign S[37] = Q[37];
|
||||
assign S[38] = Q[38];
|
||||
assign S[39] = Q[39];
|
||||
assign S[40] = Q[40];
|
||||
assign S[41] = Q[41];
|
||||
assign S[42] = Q[42];
|
||||
assign S[43] = Q[43];
|
||||
assign S[44] = Q[44];
|
||||
assign S[45] = Q[45];
|
||||
assign S[46] = Q[46];
|
||||
assign S[47] = Q[47];
|
||||
assign S[48] = Q[48];
|
||||
assign S[49] = Q[49];
|
||||
assign S[50] = Q[50];
|
||||
assign S[51] = Q[51];
|
||||
assign CO = Q[52];
|
||||
|
||||
endmodule //cla52
|
||||
|
||||
// This module implements a 52-bit carry lookahead subtractor. It is used
|
||||
// for rounding in the floating point adder.
|
||||
|
||||
module cla_sub52 (S, X, Y);
|
||||
|
||||
input [51:0] X;
|
||||
input [51:0] Y;
|
||||
|
||||
output [51:0] S;
|
||||
|
||||
wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_52;
|
||||
wire CO_64;
|
||||
|
||||
assign Bbar = ~B;
|
||||
assign LOGIC0 = 0;
|
||||
assign CIN = 0;
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , CIN, Q , CO_64);
|
||||
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = X[12];
|
||||
assign B[12] = Y[12];
|
||||
assign A[13] = X[13];
|
||||
assign B[13] = Y[13];
|
||||
assign A[14] = X[14];
|
||||
assign B[14] = Y[14];
|
||||
assign A[15] = X[15];
|
||||
assign B[15] = Y[15];
|
||||
assign A[16] = X[16];
|
||||
assign B[16] = Y[16];
|
||||
assign A[17] = X[17];
|
||||
assign B[17] = Y[17];
|
||||
assign A[18] = X[18];
|
||||
assign B[18] = Y[18];
|
||||
assign A[19] = X[19];
|
||||
assign B[19] = Y[19];
|
||||
assign A[20] = X[20];
|
||||
assign B[20] = Y[20];
|
||||
assign A[21] = X[21];
|
||||
assign B[21] = Y[21];
|
||||
assign A[22] = X[22];
|
||||
assign B[22] = Y[22];
|
||||
assign A[23] = X[23];
|
||||
assign B[23] = Y[23];
|
||||
assign A[24] = X[24];
|
||||
assign B[24] = Y[24];
|
||||
assign A[25] = X[25];
|
||||
assign B[25] = Y[25];
|
||||
assign A[26] = X[26];
|
||||
assign B[26] = Y[26];
|
||||
assign A[27] = X[27];
|
||||
assign B[27] = Y[27];
|
||||
assign A[28] = X[28];
|
||||
assign B[28] = Y[28];
|
||||
assign A[29] = X[29];
|
||||
assign B[29] = Y[29];
|
||||
assign A[30] = X[30];
|
||||
assign B[30] = Y[30];
|
||||
assign A[31] = X[31];
|
||||
assign B[31] = Y[31];
|
||||
assign A[32] = X[32];
|
||||
assign B[32] = Y[32];
|
||||
assign A[33] = X[33];
|
||||
assign B[33] = Y[33];
|
||||
assign A[34] = X[34];
|
||||
assign B[34] = Y[34];
|
||||
assign A[35] = X[35];
|
||||
assign B[35] = Y[35];
|
||||
assign A[36] = X[36];
|
||||
assign B[36] = Y[36];
|
||||
assign A[37] = X[37];
|
||||
assign B[37] = Y[37];
|
||||
assign A[38] = X[38];
|
||||
assign B[38] = Y[38];
|
||||
assign A[39] = X[39];
|
||||
assign B[39] = Y[39];
|
||||
assign A[40] = X[40];
|
||||
assign B[40] = Y[40];
|
||||
assign A[41] = X[41];
|
||||
assign B[41] = Y[41];
|
||||
assign A[42] = X[42];
|
||||
assign B[42] = Y[42];
|
||||
assign A[43] = X[43];
|
||||
assign B[43] = Y[43];
|
||||
assign A[44] = X[44];
|
||||
assign B[44] = Y[44];
|
||||
assign A[45] = X[45];
|
||||
assign B[45] = Y[45];
|
||||
assign A[46] = X[46];
|
||||
assign B[46] = Y[46];
|
||||
assign A[47] = X[47];
|
||||
assign B[47] = Y[47];
|
||||
assign A[48] = X[48];
|
||||
assign B[48] = Y[48];
|
||||
assign A[49] = X[49];
|
||||
assign B[49] = Y[49];
|
||||
assign A[50] = X[50];
|
||||
assign B[50] = Y[50];
|
||||
assign A[51] = X[51];
|
||||
assign B[51] = Y[51];
|
||||
assign A[52] = LOGIC0;
|
||||
assign B[52] = LOGIC0;
|
||||
assign A[53] = LOGIC0;
|
||||
assign B[53] = LOGIC0;
|
||||
assign A[54] = LOGIC0;
|
||||
assign B[54] = LOGIC0;
|
||||
assign A[55] = LOGIC0;
|
||||
assign B[55] = LOGIC0;
|
||||
assign A[56] = LOGIC0;
|
||||
assign B[56] = LOGIC0;
|
||||
assign A[57] = LOGIC0;
|
||||
assign B[57] = LOGIC0;
|
||||
assign A[58] = LOGIC0;
|
||||
assign B[58] = LOGIC0;
|
||||
assign A[59] = LOGIC0;
|
||||
assign B[59] = LOGIC0;
|
||||
assign A[60] = LOGIC0;
|
||||
assign B[60] = LOGIC0;
|
||||
assign A[61] = LOGIC0;
|
||||
assign B[61] = LOGIC0;
|
||||
assign A[62] = LOGIC0;
|
||||
assign B[62] = LOGIC0;
|
||||
assign A[63] = LOGIC0;
|
||||
assign B[63] = LOGIC0;
|
||||
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign S[12] = Q[12];
|
||||
assign S[13] = Q[13];
|
||||
assign S[14] = Q[14];
|
||||
assign S[15] = Q[15];
|
||||
assign S[16] = Q[16];
|
||||
assign S[17] = Q[17];
|
||||
assign S[18] = Q[18];
|
||||
assign S[19] = Q[19];
|
||||
assign S[20] = Q[20];
|
||||
assign S[21] = Q[21];
|
||||
assign S[22] = Q[22];
|
||||
assign S[23] = Q[23];
|
||||
assign S[24] = Q[24];
|
||||
assign S[25] = Q[25];
|
||||
assign S[26] = Q[26];
|
||||
assign S[27] = Q[27];
|
||||
assign S[28] = Q[28];
|
||||
assign S[29] = Q[29];
|
||||
assign S[30] = Q[30];
|
||||
assign S[31] = Q[31];
|
||||
assign S[32] = Q[32];
|
||||
assign S[33] = Q[33];
|
||||
assign S[34] = Q[34];
|
||||
assign S[35] = Q[35];
|
||||
assign S[36] = Q[36];
|
||||
assign S[37] = Q[37];
|
||||
assign S[38] = Q[38];
|
||||
assign S[39] = Q[39];
|
||||
assign S[40] = Q[40];
|
||||
assign S[41] = Q[41];
|
||||
assign S[42] = Q[42];
|
||||
assign S[43] = Q[43];
|
||||
assign S[44] = Q[44];
|
||||
assign S[45] = Q[45];
|
||||
assign S[46] = Q[46];
|
||||
assign S[47] = Q[47];
|
||||
assign S[48] = Q[48];
|
||||
assign S[49] = Q[49];
|
||||
assign S[50] = Q[50];
|
||||
assign S[51] = Q[51];
|
||||
assign CO_52 = Q[52];
|
||||
|
||||
endmodule //cla_sub52
|
@ -1,420 +0,0 @@
|
||||
// This module implements a 64-bit carry lookehead adder/subtractor.
|
||||
// It is used to perform the primary addition in the floating point
|
||||
// adder
|
||||
|
||||
module cla64 (S, X, Y, Sub);
|
||||
|
||||
input [63:0] X;
|
||||
input [63:0] Y;
|
||||
input Sub;
|
||||
output [63:0] S;
|
||||
wire CO;
|
||||
wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = X[12];
|
||||
assign B[12] = Y[12];
|
||||
assign A[13] = X[13];
|
||||
assign B[13] = Y[13];
|
||||
assign A[14] = X[14];
|
||||
assign B[14] = Y[14];
|
||||
assign A[15] = X[15];
|
||||
assign B[15] = Y[15];
|
||||
assign A[16] = X[16];
|
||||
assign B[16] = Y[16];
|
||||
assign A[17] = X[17];
|
||||
assign B[17] = Y[17];
|
||||
assign A[18] = X[18];
|
||||
assign B[18] = Y[18];
|
||||
assign A[19] = X[19];
|
||||
assign B[19] = Y[19];
|
||||
assign A[20] = X[20];
|
||||
assign B[20] = Y[20];
|
||||
assign A[21] = X[21];
|
||||
assign B[21] = Y[21];
|
||||
assign A[22] = X[22];
|
||||
assign B[22] = Y[22];
|
||||
assign A[23] = X[23];
|
||||
assign B[23] = Y[23];
|
||||
assign A[24] = X[24];
|
||||
assign B[24] = Y[24];
|
||||
assign A[25] = X[25];
|
||||
assign B[25] = Y[25];
|
||||
assign A[26] = X[26];
|
||||
assign B[26] = Y[26];
|
||||
assign A[27] = X[27];
|
||||
assign B[27] = Y[27];
|
||||
assign A[28] = X[28];
|
||||
assign B[28] = Y[28];
|
||||
assign A[29] = X[29];
|
||||
assign B[29] = Y[29];
|
||||
assign A[30] = X[30];
|
||||
assign B[30] = Y[30];
|
||||
assign A[31] = X[31];
|
||||
assign B[31] = Y[31];
|
||||
assign A[32] = X[32];
|
||||
assign B[32] = Y[32];
|
||||
assign A[33] = X[33];
|
||||
assign B[33] = Y[33];
|
||||
assign A[34] = X[34];
|
||||
assign B[34] = Y[34];
|
||||
assign A[35] = X[35];
|
||||
assign B[35] = Y[35];
|
||||
assign A[36] = X[36];
|
||||
assign B[36] = Y[36];
|
||||
assign A[37] = X[37];
|
||||
assign B[37] = Y[37];
|
||||
assign A[38] = X[38];
|
||||
assign B[38] = Y[38];
|
||||
assign A[39] = X[39];
|
||||
assign B[39] = Y[39];
|
||||
assign A[40] = X[40];
|
||||
assign B[40] = Y[40];
|
||||
assign A[41] = X[41];
|
||||
assign B[41] = Y[41];
|
||||
assign A[42] = X[42];
|
||||
assign B[42] = Y[42];
|
||||
assign A[43] = X[43];
|
||||
assign B[43] = Y[43];
|
||||
assign A[44] = X[44];
|
||||
assign B[44] = Y[44];
|
||||
assign A[45] = X[45];
|
||||
assign B[45] = Y[45];
|
||||
assign A[46] = X[46];
|
||||
assign B[46] = Y[46];
|
||||
assign A[47] = X[47];
|
||||
assign B[47] = Y[47];
|
||||
assign A[48] = X[48];
|
||||
assign B[48] = Y[48];
|
||||
assign A[49] = X[49];
|
||||
assign B[49] = Y[49];
|
||||
assign A[50] = X[50];
|
||||
assign B[50] = Y[50];
|
||||
assign A[51] = X[51];
|
||||
assign B[51] = Y[51];
|
||||
assign A[52] = X[52];
|
||||
assign B[52] = Y[52];
|
||||
assign A[53] = X[53];
|
||||
assign B[53] = Y[53];
|
||||
assign A[54] = X[54];
|
||||
assign B[54] = Y[54];
|
||||
assign A[55] = X[55];
|
||||
assign B[55] = Y[55];
|
||||
assign A[56] = X[56];
|
||||
assign B[56] = Y[56];
|
||||
assign A[57] = X[57];
|
||||
assign B[57] = Y[57];
|
||||
assign A[58] = X[58];
|
||||
assign B[58] = Y[58];
|
||||
assign A[59] = X[59];
|
||||
assign B[59] = Y[59];
|
||||
assign A[60] = X[60];
|
||||
assign B[60] = Y[60];
|
||||
assign A[61] = X[61];
|
||||
assign B[61] = Y[61];
|
||||
assign A[62] = X[62];
|
||||
assign B[62] = Y[62];
|
||||
assign A[63] = X[63];
|
||||
assign B[63] = Y[63];
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign S[12] = Q[12];
|
||||
assign S[13] = Q[13];
|
||||
assign S[14] = Q[14];
|
||||
assign S[15] = Q[15];
|
||||
assign S[16] = Q[16];
|
||||
assign S[17] = Q[17];
|
||||
assign S[18] = Q[18];
|
||||
assign S[19] = Q[19];
|
||||
assign S[20] = Q[20];
|
||||
assign S[21] = Q[21];
|
||||
assign S[22] = Q[22];
|
||||
assign S[23] = Q[23];
|
||||
assign S[24] = Q[24];
|
||||
assign S[25] = Q[25];
|
||||
assign S[26] = Q[26];
|
||||
assign S[27] = Q[27];
|
||||
assign S[28] = Q[28];
|
||||
assign S[29] = Q[29];
|
||||
assign S[30] = Q[30];
|
||||
assign S[31] = Q[31];
|
||||
assign S[32] = Q[32];
|
||||
assign S[33] = Q[33];
|
||||
assign S[34] = Q[34];
|
||||
assign S[35] = Q[35];
|
||||
assign S[36] = Q[36];
|
||||
assign S[37] = Q[37];
|
||||
assign S[38] = Q[38];
|
||||
assign S[39] = Q[39];
|
||||
assign S[40] = Q[40];
|
||||
assign S[41] = Q[41];
|
||||
assign S[42] = Q[42];
|
||||
assign S[43] = Q[43];
|
||||
assign S[44] = Q[44];
|
||||
assign S[45] = Q[45];
|
||||
assign S[46] = Q[46];
|
||||
assign S[47] = Q[47];
|
||||
assign S[48] = Q[48];
|
||||
assign S[49] = Q[49];
|
||||
assign S[50] = Q[50];
|
||||
assign S[51] = Q[51];
|
||||
assign S[52] = Q[52];
|
||||
assign S[53] = Q[53];
|
||||
assign S[54] = Q[54];
|
||||
assign S[55] = Q[55];
|
||||
assign S[56] = Q[56];
|
||||
assign S[57] = Q[57];
|
||||
assign S[58] = Q[58];
|
||||
assign S[59] = Q[59];
|
||||
assign S[60] = Q[60];
|
||||
assign S[61] = Q[61];
|
||||
assign S[62] = Q[62];
|
||||
assign S[63] = Q[63];
|
||||
assign Bbar = B ^ {64{Sub}};
|
||||
|
||||
endmodule // cla64
|
||||
|
||||
// This module performs 64-bit subtraction. It is used to get the two's complement
|
||||
// of main addition or subtraction in the floating point adder.
|
||||
|
||||
module cla_sub64 (S, X, Y);
|
||||
|
||||
input [63:0] X;
|
||||
input [63:0] Y;
|
||||
|
||||
output [63:0] S;
|
||||
|
||||
wire CO;
|
||||
wire VDD = 1'b1;
|
||||
wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
|
||||
assign A[0] = X[0];
|
||||
assign B[0] = Y[0];
|
||||
assign A[1] = X[1];
|
||||
assign B[1] = Y[1];
|
||||
assign A[2] = X[2];
|
||||
assign B[2] = Y[2];
|
||||
assign A[3] = X[3];
|
||||
assign B[3] = Y[3];
|
||||
assign A[4] = X[4];
|
||||
assign B[4] = Y[4];
|
||||
assign A[5] = X[5];
|
||||
assign B[5] = Y[5];
|
||||
assign A[6] = X[6];
|
||||
assign B[6] = Y[6];
|
||||
assign A[7] = X[7];
|
||||
assign B[7] = Y[7];
|
||||
assign A[8] = X[8];
|
||||
assign B[8] = Y[8];
|
||||
assign A[9] = X[9];
|
||||
assign B[9] = Y[9];
|
||||
assign A[10] = X[10];
|
||||
assign B[10] = Y[10];
|
||||
assign A[11] = X[11];
|
||||
assign B[11] = Y[11];
|
||||
assign A[12] = X[12];
|
||||
assign B[12] = Y[12];
|
||||
assign A[13] = X[13];
|
||||
assign B[13] = Y[13];
|
||||
assign A[14] = X[14];
|
||||
assign B[14] = Y[14];
|
||||
assign A[15] = X[15];
|
||||
assign B[15] = Y[15];
|
||||
assign A[16] = X[16];
|
||||
assign B[16] = Y[16];
|
||||
assign A[17] = X[17];
|
||||
assign B[17] = Y[17];
|
||||
assign A[18] = X[18];
|
||||
assign B[18] = Y[18];
|
||||
assign A[19] = X[19];
|
||||
assign B[19] = Y[19];
|
||||
assign A[20] = X[20];
|
||||
assign B[20] = Y[20];
|
||||
assign A[21] = X[21];
|
||||
assign B[21] = Y[21];
|
||||
assign A[22] = X[22];
|
||||
assign B[22] = Y[22];
|
||||
assign A[23] = X[23];
|
||||
assign B[23] = Y[23];
|
||||
assign A[24] = X[24];
|
||||
assign B[24] = Y[24];
|
||||
assign A[25] = X[25];
|
||||
assign B[25] = Y[25];
|
||||
assign A[26] = X[26];
|
||||
assign B[26] = Y[26];
|
||||
assign A[27] = X[27];
|
||||
assign B[27] = Y[27];
|
||||
assign A[28] = X[28];
|
||||
assign B[28] = Y[28];
|
||||
assign A[29] = X[29];
|
||||
assign B[29] = Y[29];
|
||||
assign A[30] = X[30];
|
||||
assign B[30] = Y[30];
|
||||
assign A[31] = X[31];
|
||||
assign B[31] = Y[31];
|
||||
assign A[32] = X[32];
|
||||
assign B[32] = Y[32];
|
||||
assign A[33] = X[33];
|
||||
assign B[33] = Y[33];
|
||||
assign A[34] = X[34];
|
||||
assign B[34] = Y[34];
|
||||
assign A[35] = X[35];
|
||||
assign B[35] = Y[35];
|
||||
assign A[36] = X[36];
|
||||
assign B[36] = Y[36];
|
||||
assign A[37] = X[37];
|
||||
assign B[37] = Y[37];
|
||||
assign A[38] = X[38];
|
||||
assign B[38] = Y[38];
|
||||
assign A[39] = X[39];
|
||||
assign B[39] = Y[39];
|
||||
assign A[40] = X[40];
|
||||
assign B[40] = Y[40];
|
||||
assign A[41] = X[41];
|
||||
assign B[41] = Y[41];
|
||||
assign A[42] = X[42];
|
||||
assign B[42] = Y[42];
|
||||
assign A[43] = X[43];
|
||||
assign B[43] = Y[43];
|
||||
assign A[44] = X[44];
|
||||
assign B[44] = Y[44];
|
||||
assign A[45] = X[45];
|
||||
assign B[45] = Y[45];
|
||||
assign A[46] = X[46];
|
||||
assign B[46] = Y[46];
|
||||
assign A[47] = X[47];
|
||||
assign B[47] = Y[47];
|
||||
assign A[48] = X[48];
|
||||
assign B[48] = Y[48];
|
||||
assign A[49] = X[49];
|
||||
assign B[49] = Y[49];
|
||||
assign A[50] = X[50];
|
||||
assign B[50] = Y[50];
|
||||
assign A[51] = X[51];
|
||||
assign B[51] = Y[51];
|
||||
assign A[52] = X[52];
|
||||
assign B[52] = Y[52];
|
||||
assign A[53] = X[53];
|
||||
assign B[53] = Y[53];
|
||||
assign A[54] = X[54];
|
||||
assign B[54] = Y[54];
|
||||
assign A[55] = X[55];
|
||||
assign B[55] = Y[55];
|
||||
assign A[56] = X[56];
|
||||
assign B[56] = Y[56];
|
||||
assign A[57] = X[57];
|
||||
assign B[57] = Y[57];
|
||||
assign A[58] = X[58];
|
||||
assign B[58] = Y[58];
|
||||
assign A[59] = X[59];
|
||||
assign B[59] = Y[59];
|
||||
assign A[60] = X[60];
|
||||
assign B[60] = Y[60];
|
||||
assign A[61] = X[61];
|
||||
assign B[61] = Y[61];
|
||||
assign A[62] = X[62];
|
||||
assign B[62] = Y[62];
|
||||
assign A[63] = X[63];
|
||||
assign B[63] = Y[63];
|
||||
assign S[0] = Q[0];
|
||||
assign S[1] = Q[1];
|
||||
assign S[2] = Q[2];
|
||||
assign S[3] = Q[3];
|
||||
assign S[4] = Q[4];
|
||||
assign S[5] = Q[5];
|
||||
assign S[6] = Q[6];
|
||||
assign S[7] = Q[7];
|
||||
assign S[8] = Q[8];
|
||||
assign S[9] = Q[9];
|
||||
assign S[10] = Q[10];
|
||||
assign S[11] = Q[11];
|
||||
assign S[12] = Q[12];
|
||||
assign S[13] = Q[13];
|
||||
assign S[14] = Q[14];
|
||||
assign S[15] = Q[15];
|
||||
assign S[16] = Q[16];
|
||||
assign S[17] = Q[17];
|
||||
assign S[18] = Q[18];
|
||||
assign S[19] = Q[19];
|
||||
assign S[20] = Q[20];
|
||||
assign S[21] = Q[21];
|
||||
assign S[22] = Q[22];
|
||||
assign S[23] = Q[23];
|
||||
assign S[24] = Q[24];
|
||||
assign S[25] = Q[25];
|
||||
assign S[26] = Q[26];
|
||||
assign S[27] = Q[27];
|
||||
assign S[28] = Q[28];
|
||||
assign S[29] = Q[29];
|
||||
assign S[30] = Q[30];
|
||||
assign S[31] = Q[31];
|
||||
assign S[32] = Q[32];
|
||||
assign S[33] = Q[33];
|
||||
assign S[34] = Q[34];
|
||||
assign S[35] = Q[35];
|
||||
assign S[36] = Q[36];
|
||||
assign S[37] = Q[37];
|
||||
assign S[38] = Q[38];
|
||||
assign S[39] = Q[39];
|
||||
assign S[40] = Q[40];
|
||||
assign S[41] = Q[41];
|
||||
assign S[42] = Q[42];
|
||||
assign S[43] = Q[43];
|
||||
assign S[44] = Q[44];
|
||||
assign S[45] = Q[45];
|
||||
assign S[46] = Q[46];
|
||||
assign S[47] = Q[47];
|
||||
assign S[48] = Q[48];
|
||||
assign S[49] = Q[49];
|
||||
assign S[50] = Q[50];
|
||||
assign S[51] = Q[51];
|
||||
assign S[52] = Q[52];
|
||||
assign S[53] = Q[53];
|
||||
assign S[54] = Q[54];
|
||||
assign S[55] = Q[55];
|
||||
assign S[56] = Q[56];
|
||||
assign S[57] = Q[57];
|
||||
assign S[58] = Q[58];
|
||||
assign S[59] = Q[59];
|
||||
assign S[60] = Q[60];
|
||||
assign S[61] = Q[61];
|
||||
assign S[62] = Q[62];
|
||||
assign S[63] = Q[63];
|
||||
assign Bbar = ~B;
|
||||
|
||||
endmodule // cla_sub64
|
@ -1,83 +0,0 @@
|
||||
// Exception logic for the floating point adder. Note: We may
|
||||
// actually want to move to where the result is computed.
|
||||
|
||||
module exception (
|
||||
|
||||
input logic [2:0] op_type, // Function opcode
|
||||
input logic XSgnE, YSgnE,
|
||||
// input logic [52:0] XManE, YManE,
|
||||
input logic XDenormE, YDenormE,
|
||||
input logic XNormE, YNormE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
output logic [3:0] Ztype, // Indicates type of result (Z)
|
||||
output logic Invalid, // Invalid operation exception
|
||||
output logic Denorm, // Denormalized logic
|
||||
output logic Sub // The effective operation is subtraction
|
||||
);
|
||||
wire ZQNaN; // '1' if result Z is a quiet NaN
|
||||
wire ZPInf; // '1' if result Z positive infnity
|
||||
wire ZNInf; // '1' if result Z negative infnity
|
||||
wire add_sub; // '1' if operation is add or subtract
|
||||
wire converts; // See if there are any converts
|
||||
|
||||
|
||||
|
||||
// Is this instruction a convert
|
||||
assign converts = op_type[1];
|
||||
|
||||
|
||||
|
||||
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
|
||||
// or (A and B are both Infinite and the "effective operation" is
|
||||
// subtraction).
|
||||
assign add_sub = ~op_type[1];
|
||||
assign Invalid = (XSNaNE | YSNaNE | (add_sub & XInfE & YInfE & (XSgnE^YSgnE^op_type[0]))) & ~converts;
|
||||
|
||||
// The Denorm flag is set if (A is denormlized and the operation is not integer
|
||||
// conversion ) or (if B is normalized and the operation is addition or subtraction).
|
||||
assign Denorm = XDenormE | YDenormE & add_sub;
|
||||
|
||||
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
|
||||
// or (A is a NaN) or (B is a NaN and the operation uses B).
|
||||
assign ZQNaN = Invalid | XNaNE | (YNaNE & add_sub);
|
||||
|
||||
// The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is
|
||||
// subtraction) or (B is +Inf and the operation is addition)) and (the
|
||||
// result is not a quiet NaN).
|
||||
assign ZPInf = (XInfE&XSgnE | add_sub&YInfE&(~YSgnE^op_type[0]))&~ZQNaN;
|
||||
|
||||
// The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is
|
||||
// subtraction) or (B is -Inf and the operation is addition)) and the
|
||||
// result is not a quiet NaN.
|
||||
assign ZNInf = (XInfE&~XSgnE | add_sub&YInfE&(YSgnE^op_type[0]))&~ZQNaN;
|
||||
|
||||
// Set the type of the result as follows:
|
||||
// (needs optimization - got lazy or was late)
|
||||
// Ztype Result
|
||||
// 0000 Normal
|
||||
// 0001 Quiet NaN
|
||||
// 0010 Negative Infinity
|
||||
// 0011 Positive Infinity
|
||||
// 0100 +Bzero and +Azero (and vice-versa)
|
||||
// 0101 +Bzero and -Azero (and vice-versa)
|
||||
// 1000 Convert SP to DP (and vice-versa)
|
||||
|
||||
assign Ztype[0] = (ZQNaN | ZPInf) |
|
||||
((XZeroE & YZeroE & (XSgnE^YSgnE^op_type[0]))
|
||||
& ~converts);
|
||||
assign Ztype[1] = (ZNInf | ZPInf) |
|
||||
(((XZeroE & YZeroE & XSgnE & YSgnE & ~op_type[0]) |
|
||||
(XZeroE & YZeroE & XSgnE & ~YSgnE & op_type[0]))
|
||||
& ~converts);
|
||||
assign Ztype[2] = ((XZeroE & YZeroE & ~op_type[1])
|
||||
& ~converts);
|
||||
assign Ztype[3] = (op_type[1] & ~op_type[0]);
|
||||
|
||||
// Determine if the effective operation is subtraction
|
||||
assign Sub = add_sub & (XSgnE^YSgnE^op_type[0]);
|
||||
|
||||
endmodule // exception
|
||||
|
@ -1,426 +0,0 @@
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller exponent,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put sum onto output.
|
||||
//
|
||||
|
||||
module faddcvt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall the memory stage
|
||||
input logic [63:0] FSrcXE, // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE, // 2nd input operand (B)
|
||||
input logic [2:0] FOpCtrlE, FOpCtrlM, // Function opcode
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
input logic XSgnE, YSgnE,
|
||||
input logic [52:0] XManE, YManE,
|
||||
input logic [10:0] XExpE, YExpE,
|
||||
input logic XSgnM, YSgnM,
|
||||
input logic [52:0] XManM, YManM,
|
||||
input logic [10:0] XExpM, YExpM,
|
||||
input logic XDenormE, YDenormE,
|
||||
input logic XNormE, YNormE,
|
||||
input logic XNormM, YNormM,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
output logic [4:0] FAddFlgM); // IEEE exception flags
|
||||
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
logic [63:0] AddSumTcE, AddSumTcM;
|
||||
logic [3:0] AddSelInvE, AddSelInvM;
|
||||
logic [10:0] AddExpPostSumE,AddExpPostSumM;
|
||||
logic AddCorrSignE, AddCorrSignM;
|
||||
logic AddOpANormE, AddOpANormM;
|
||||
logic AddOpBNormE, AddOpBNormM;
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
|
||||
|
||||
fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddSwapE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM});
|
||||
|
||||
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .XNormM, .YNormM,
|
||||
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
|
||||
.AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
|
||||
.AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
endmodule
|
||||
|
||||
module fpuaddcvt1 (
|
||||
input logic [2:0] FOpCtrlE, // Function opcode
|
||||
input logic FmtE, // Result Precision (1 for double, 0 for single)
|
||||
input logic XSgnE, YSgnE,
|
||||
input logic [10:0] XExpE, YExpE,
|
||||
input logic [52:0] XManE, YManE,
|
||||
input logic XDenormE, YDenormE,
|
||||
input logic XNormE, YNormE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
|
||||
output logic [10:0] AddExponentE,
|
||||
output logic [10:0] AddExpPostSumE,
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE,
|
||||
output logic [3:0] AddSelInvE,
|
||||
output logic AddCorrSignE,
|
||||
output logic AddSignAE,
|
||||
output logic AddOpANormE, AddOpBNormE,
|
||||
output logic AddInvalidE,
|
||||
output logic AddDenormInE,
|
||||
output logic AddSwapE
|
||||
);
|
||||
|
||||
logic [5:0] ZP_mantissaA;
|
||||
logic [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
wire P;
|
||||
assign P = ~(FmtE^FOpCtrlE[1]);
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub),
|
||||
.XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
|
||||
.op_type(FOpCtrlE));
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
// changed to a behavior level to allow the tools to try to optimize
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, XExpE};
|
||||
assign exp2 = {1'b0, YExpE};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if FOpCtrlE does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = FOpCtrlE[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign AddSwapE = exp_diff1[11] & ~zeroB;
|
||||
assign AddExponentE = AddSwapE ? YExpE : XExpE;
|
||||
assign AddExpPostSumE = AddSwapE ? YExpE : XExpE;
|
||||
assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0];
|
||||
assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0];
|
||||
assign AddSignAE = AddSwapE ? YSgnE : XSgnE;
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
// modified to 52 bits to detect leading zeroes on denormalized mantissas
|
||||
// lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
|
||||
// lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
logic [8:0] i;
|
||||
logic [8:0] j;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~mantissaA[52-i] & $unsigned(i) <= $unsigned(52)) i = i+1; // search for leading one
|
||||
ZP_mantissaA = i;
|
||||
end
|
||||
always_comb begin
|
||||
j = 0;
|
||||
while (~mantissaB[52-j] & $unsigned(j) <= $unsigned(52)) j = j+1; // search for leading one
|
||||
ZP_mantissaB = j;
|
||||
end
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
// Unless the number has an exponent of zero, in which case it
|
||||
// is unpacked as
|
||||
// 000.00 ... 00
|
||||
// This effectively flushes denormalized values to zero.
|
||||
// The three bits of to the left of the binary point prevent overflow
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign AddOpANormE = AddSwapE ? YNormE : XNormE;
|
||||
assign AddOpBNormE = AddSwapE ? XNormE : YNormE;
|
||||
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
// The size of the barrel shifter could be reduced by two bits
|
||||
// by not adding the leading two zeros until after the shift.
|
||||
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
// assign IntValue [31:0] = FSrcXE[31:0];
|
||||
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign mantissaA3 = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0};
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}};
|
||||
assign mantissaB3[6] = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB;
|
||||
assign mantissaB3[5:0] = AddDenormInE ? mantissaB[5:0] : 6'h0;
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
// assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and AddConvertM SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put AddSumM onto output.
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
input logic [2:0] FOpCtrlM, // Function opcode
|
||||
input logic FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [63:0] AddSumM, AddSumTcM,
|
||||
input logic [11:0] AddExp1DenormM, AddExp2DenormM,
|
||||
input logic [10:0] AddExponentM, AddExpPostSumM,
|
||||
input logic [3:0] AddSelInvM,
|
||||
input logic XSgnM, YSgnM,
|
||||
input logic [52:0] XManM, YManM,
|
||||
input logic [10:0] XExpM, YExpM,
|
||||
input logic XNormM, YNormM,
|
||||
input logic AddOpANormM, AddOpBNormM,
|
||||
input logic AddInvalidM,
|
||||
input logic AddDenormInM,
|
||||
input logic AddSignAM,
|
||||
input logic AddCorrSignM,
|
||||
input logic AddSwapM,
|
||||
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
output logic [4:0] FAddFlgM // IEEE exception flags
|
||||
);
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = ~(FmtM^FOpCtrlM[1]);
|
||||
|
||||
wire [10:0] exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [63:0] sum_norm, sum_norm_w_bypass;
|
||||
wire [5:0] norm_shift, norm_shift_denorm;
|
||||
wire exp_valid;
|
||||
wire DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire Sticky_out;
|
||||
wire sign_corr;
|
||||
wire zeroB;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
wire Float1_sum_comp;
|
||||
wire Float2_sum_comp;
|
||||
wire Float1_sum_tc_comp;
|
||||
wire Float2_sum_tc_comp;
|
||||
wire normal_underflow;
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
|
||||
logic AddOvEnM; // Overflow trap enabled
|
||||
logic AddUnEnM; // Underflow trap enabled
|
||||
|
||||
assign AddOvEnM = 1'b1;
|
||||
assign AddUnEnM = 1'b1;
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
assign exp_pre = AddDenormInM ?
|
||||
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
|
||||
: AddExponentM;
|
||||
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
|
||||
assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]);
|
||||
assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]);
|
||||
assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]);
|
||||
assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]);
|
||||
|
||||
// Determines the correct Float value to compare based on AddSwapM result
|
||||
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
|
||||
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting AddSumM
|
||||
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63];
|
||||
|
||||
// If the AddSumM is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM);
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
|
||||
|
||||
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
|
||||
|
||||
// Barell shifter used for normalization. It takes as inputs the
|
||||
// the corrected AddSumM and the amount by which the AddSumM should
|
||||
// be right shifted. It outputs the normalized AddSumM.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = sum_norm;
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
|
||||
// At this point, normalization has already been performed, so we know
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid,
|
||||
.sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass),
|
||||
.exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}),
|
||||
.normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM));
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign FAddResM = Result;
|
||||
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -5,18 +5,18 @@ module fclassify (
|
||||
input logic XSgnE, // sign bit
|
||||
input logic XNaNE, // is NaN
|
||||
input logic XSNaNE, // is signaling NaN
|
||||
input logic XNormE, // is normal
|
||||
input logic XDenormE, // is denormal
|
||||
input logic XZeroE, // is zero
|
||||
input logic XInfE, // is infinity
|
||||
output logic [63:0] ClassResE // classify result
|
||||
output logic [`XLEN-1:0] ClassResE // classify result
|
||||
);
|
||||
|
||||
logic PInf, PZero, PNorm, PDenorm;
|
||||
logic NInf, NZero, NNorm, NDenorm;
|
||||
|
||||
logic XNormE;
|
||||
|
||||
// determine the sub categories
|
||||
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
|
||||
assign PInf = ~XSgnE&XInfE;
|
||||
assign NInf = XSgnE&XInfE;
|
||||
assign PNorm = ~XSgnE&XNormE;
|
||||
@ -37,6 +37,6 @@ module fclassify (
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
|
||||
module fcmp (
|
||||
input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // see above table
|
||||
input logic XSgnE, YSgnE, // input signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, // input exponents
|
||||
|
@ -1,3 +1,4 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fctrl (
|
||||
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
|
||||
@ -13,7 +14,7 @@ module fctrl (
|
||||
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [1:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
|
||||
output logic FmtD, // precision - single-0 double-1
|
||||
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
);
|
||||
@ -72,14 +73,12 @@ module fctrl (
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
|
||||
@ -88,14 +87,12 @@ module fctrl (
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
|
||||
@ -119,8 +116,18 @@ module fctrl (
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
|
||||
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
// FResultSel:
|
||||
// 000 - ReadRes - load
|
||||
// 001 - FMARes - FMA and multiply
|
||||
|
@ -2,6 +2,7 @@
|
||||
`include "wally-config.vh"
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
|
||||
module fcvt (
|
||||
input logic XSgnE, // input's sign
|
||||
@ -11,15 +12,15 @@ module fcvt (
|
||||
input logic [2:0] FOpCtrlE, // choose which opperation (look below for values)
|
||||
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZeroE, // is the input zero
|
||||
input logic XOrigDenormE, // is the input denormalized
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic XInfE, // is the input infinity
|
||||
input logic XNaNE, // is the input a NaN
|
||||
input logic XSNaNE, // is the input a signaling NaN
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`FLEN-1:0] CvtResE, // the fp to fp conversion's result
|
||||
output logic [`XLEN-1:0] CvtIntResE, // the fp to fp conversion's result
|
||||
output logic [4:0] CvtFlgE // the fp to fp conversion's flags
|
||||
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
|
||||
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
|
||||
output logic [4:0] CvtFlgE // the conversion's flags
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
@ -37,11 +38,12 @@ module fcvt (
|
||||
// (FI) fp -> int coversion signals
|
||||
|
||||
|
||||
logic [`FPSIZES/3:0] OutFmt; // format of the output
|
||||
logic [`FMTBITS-1:0] OutFmt; // format of the output
|
||||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`NE:0] CalcExp; // the calculated expoent
|
||||
logic [$clog2(`LGLEN):0] ShiftAmt; // how much to shift by
|
||||
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
|
||||
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
|
||||
logic ResDenormUf;// does the result underflow or is denormalized
|
||||
logic ResUf; // does the result underflow
|
||||
@ -71,6 +73,7 @@ module fcvt (
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
@ -82,23 +85,20 @@ module fcvt (
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - int -> fp: FmtE contains the percision of the output
|
||||
assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT);
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp ? FmtE : FOpCtrlE[1:0];
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// negation
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// negate the input if the input is a negitive singed integer
|
||||
// - remove leading ones if the input is a unsigned 32-bit integer
|
||||
//
|
||||
// Negitive input
|
||||
// 64-bit input : negate the input
|
||||
// 32-bit input : trim to 32-bits and negate the input
|
||||
// Positive input
|
||||
// 64-bit input : do nothing
|
||||
// 32-bit input : trim to 32-bits
|
||||
// 1) negate the input if the input is a negitive singed integer
|
||||
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
|
||||
|
||||
assign PosInt = ResSgn ? Int64 ? -ForwardedSrcAE : {{`XLEN-32{1'b0}}, -ForwardedSrcAE[31:0]} :
|
||||
Int64 ? ForwardedSrcAE : {{`XLEN-32{1'b0}}, ForwardedSrcAE[31:0]};
|
||||
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// lzc
|
||||
@ -107,40 +107,15 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcIn = IntToFp ? {PosInt, {`LGLEN-`XLEN{1'b0}}} : // I->F
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; // F->F
|
||||
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
|
||||
|
||||
// lglen is the largest possible value of ZeroCnt (NF or XLEN) hence normcnt must be log2(lglen) bits
|
||||
logic [$clog2(`LGLEN):0] i, ZeroCnt;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~LzcIn[`LGLEN-1-i] & i <= `LGLEN-1) i = i+1; // search for leading one
|
||||
ZeroCnt = i;
|
||||
end
|
||||
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// F->F shift so the fraction is not denormalized
|
||||
// Large->Small Denrom -> Norm Frac:
|
||||
//
|
||||
// | Frac | `NF zeros| << ShiftCnt
|
||||
//
|
||||
// Small->Large Norm -> Denorm Frac:
|
||||
// - shift right so that the new-bias exponet = 1
|
||||
// - so shift right by new-bias - 1 exponent
|
||||
// - ie shift left by NF - 1 + new-bias exponent (if this is negitive then 0 is selected as a result later)
|
||||
// - new-bias exponent is negitive
|
||||
//
|
||||
// | `NF-1 zeros |1| Frac | << NF + new-bias exponent
|
||||
// | keep |
|
||||
//
|
||||
// Int -> Fp :
|
||||
// | Int | `NF zeros| << ShiftCnt
|
||||
// Fp -> Int :
|
||||
// | `XLEN zeros | Man | << CalcExp
|
||||
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
@ -150,12 +125,13 @@ module fcvt (
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to normalize the result:
|
||||
// | `NF zeros | Mantissa | 0's if nessisary |
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | lzcIn | 0's if nessisary |
|
||||
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} : {LzcIn, {`NF+1{1'b0}}};
|
||||
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{LzcIn, {`NF+1{1'b0}}};
|
||||
// kill the shift if it's negitive
|
||||
// select the amount to shift by
|
||||
// fp -> int:
|
||||
@ -168,17 +144,50 @@ module fcvt (
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN):0]&{$clog2(`LGLEN)+1{~CalcExp[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? ($clog2(`LGLEN)+1)'(`NF-1)+CalcExp[$clog2(`LGLEN):0] : (ZeroCnt+1)&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}};
|
||||
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
|
||||
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
|
||||
|
||||
// shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// fp -> int
|
||||
// CalcExp = 1 - largest bias + 1 -
|
||||
|
||||
|
||||
// *** possible optimizaations:
|
||||
@ -192,10 +201,35 @@ module fcvt (
|
||||
|
||||
// Select the bias of the output
|
||||
// fp -> int : select 1
|
||||
// ??? -> fp : pick the new bias depending on the output format
|
||||
// ??? -> fp : pick the new bias depending on the output format
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
|
||||
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
|
||||
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
|
||||
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
|
||||
default: NewBiasToFp = 1'bx;
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
|
||||
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
|
||||
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
end
|
||||
// select the old exponent
|
||||
// int -> fp : largest bias + XLEN
|
||||
// fp -> ??? : XExp
|
||||
@ -203,22 +237,76 @@ module fcvt (
|
||||
|
||||
// calculate CalcExp
|
||||
// fp -> fp :
|
||||
// - XExp - Largest bias + new bias
|
||||
// fp -> int : XExp
|
||||
// int -> fp : largest bias + XLEN
|
||||
// the -XOrigDenorm is to take into account the correction (which had a plus 1)
|
||||
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN){1'b0}}, (ZeroCnt&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}})};
|
||||
// if result is 0 or negitive
|
||||
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE;
|
||||
assign ResNegNF = (FOpCtrlE[1:0] == `FMT) ? -`NF : -`NF1;
|
||||
// if the reuslt underflows and somthing is shifted out set the sticky bit
|
||||
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE;
|
||||
// - XExp - Largest bias + new bias - (ZeroCnt+1)
|
||||
// only do ^ if the input was denormalized
|
||||
// - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
|
||||
// - correct the expoent when there is a normalization shift ( + ZeroCnt+1)
|
||||
// fp -> int : XExp - Largest Bias + 1 - (ZeroCnt+1)
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized then we dont shift... so the "- (ZeroCnt+1)" is just leftovers from other options
|
||||
// int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
|
||||
// Process:
|
||||
// - shifted right by XLEN (XLEN)
|
||||
// - shift left to normilize (-1-ZeroCnt)
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// sign
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the sign of the result
|
||||
// - if int -> fp
|
||||
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
|
||||
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
|
||||
// - otherwise: the floating point input's sign
|
||||
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -241,17 +329,80 @@ module fcvt (
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 1x - Plus1
|
||||
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
|
||||
|
||||
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, leaving nothing for the sticky bit
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
|
||||
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF] : |Shifted[`LGLEN+`NF-`NF1];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
|
||||
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
|
||||
end
|
||||
`FMT1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
|
||||
end
|
||||
`FMT2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
|
||||
end
|
||||
default: begin
|
||||
ToFpSticky = 1'bx;
|
||||
ToFpRound = 1'bx;
|
||||
ToFpLSBFrac = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
|
||||
always_comb begin // ***remove guard bit
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
|
||||
end
|
||||
2'h1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
|
||||
end
|
||||
2'h0: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
|
||||
end
|
||||
2'h2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
end
|
||||
|
||||
always_comb
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
|
||||
@ -261,32 +412,98 @@ module fcvt (
|
||||
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
assign Plus1 = CalcPlus1&(Round|Sticky);
|
||||
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1} : {{`NE+`NF1{1'b0}}, Plus1, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
|
||||
// dont round if exact
|
||||
assign Plus1 = CalcPlus1&(Round|Sticky);
|
||||
|
||||
// shift the 1 to the propper position for rounding
|
||||
// - dont round it converting to integer
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
|
||||
default: ShiftedPlus1 = 0;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
|
||||
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
|
||||
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
|
||||
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
|
||||
endcase
|
||||
end
|
||||
// kill calcExp if the result is denormalized
|
||||
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
|
||||
// trim the result's expoent to size
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// flags
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate the flags
|
||||
// dont set underflow overflow or inexact flags if result is NaN
|
||||
assign MaxExp = ToInt ? Int64 ? 65 : 33 :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
// if the exponent is lager or equal to the maximum and it's not negitive
|
||||
// F->F if the input is inf then the output is also Inf ie exact, so dont set the underflow flag
|
||||
|
||||
// find the maximum exponent (the exponent and larger overflows)
|
||||
if (`FPSIZES == 1) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
MaxExpFp = {`NE{1'b1}};
|
||||
end
|
||||
`FMT1: begin
|
||||
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
end
|
||||
`FMT2: begin
|
||||
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
|
||||
end
|
||||
default: begin
|
||||
MaxExpFp = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
MaxExpFp = {`Q_NE{1'b1}};
|
||||
end
|
||||
2'h1: begin
|
||||
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
|
||||
end
|
||||
2'h0: begin
|
||||
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
|
||||
end
|
||||
2'h2: begin
|
||||
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
end
|
||||
|
||||
// if the result exponent is larger then the maximum possible exponent
|
||||
// | and the exponent is positive
|
||||
// | | and the input is not NaN or Infinity
|
||||
// | | |
|
||||
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&~(XNaNE|XInfE));
|
||||
// only set the underflow flag if not-exact
|
||||
// set the underflow flag if the result is denomal or underflowed
|
||||
// can't underflow durring to integer conversions
|
||||
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
|
||||
|
||||
// if the result is denormalized or underflowed
|
||||
// | and the result did not round into normal values
|
||||
@ -294,18 +511,24 @@ module fcvt (
|
||||
// | | | and the result isn't NaN
|
||||
// | | | |
|
||||
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
|
||||
|
||||
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
|
||||
// if there were bits thrown away
|
||||
// | if overflowed or underflowed
|
||||
// | | and if not a NaN
|
||||
// | | |
|
||||
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
|
||||
|
||||
// if the result is too small to be represented and not 0
|
||||
// | and if the result is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// if an input was a singaling NaN(and we're using a FP input)
|
||||
// |
|
||||
assign FpInvalid = (XSNaNE&~IntToFp);
|
||||
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
@ -320,54 +543,262 @@ module fcvt (
|
||||
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive result rounds up out of range
|
||||
// select the inexact flag to output
|
||||
assign Invalid = ToInt ? IntInvalid : FpInvalid;
|
||||
// pack the flags together and choose the result based on the opperation
|
||||
// don't set the overflow or underfolw flags if converting to integer
|
||||
// pack the flags together
|
||||
// - fp -> int does not set the overflow or underflow flags
|
||||
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// result selection
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// when the input is zero for F->F the exponent is not calulated as 0 so combine with underflow result
|
||||
|
||||
//logic [$clog2(`NF)-1:0] MinDenormExp;
|
||||
//assign MinDenormExp = FOpCtrlE[1:0] == `FMT ? -`NE : -`NE1;
|
||||
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|PosInt&IntToFp));
|
||||
//assign NaNRes = FOpCtrlE[1:0] == `FMT ? {1'b0, {`NE+1{1'b1}}, (`NF-1)'(0)} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = FOpCtrlE[1:0] == `FMT ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
assign NaNRes = FOpCtrlE[1:0] == `FMT ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
// determine if you shoould kill the result
|
||||
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
NaNRes = 1'bx;
|
||||
InfRes = 1'bx;
|
||||
UfRes = 1'bx;
|
||||
Res = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// assign InfRes = FOpCtrlE[1:0] == `FMT ? {ResSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
// output one less then the maximum value if rounding down (RZ RU RD)
|
||||
// if infinitiy output infinity
|
||||
assign InfRes = FOpCtrlE[1:0] == `FMT ? ~XInfE&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
~XInfE&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
// if RU/RD then round the underflowed result if needed
|
||||
// integer zero's exponent is not calculated corresctly so go through underflow result
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
|
||||
// choose the floating point result
|
||||
// - if the input is NaN (and using the NaN input) output the NaN result
|
||||
// - if the input is infinity or the output overflows
|
||||
// - kill the InfE signal if the input isn't a floating point value
|
||||
// - if killing the result output the underflow result
|
||||
// - otherwise output the normal result
|
||||
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
|
||||
(XInfE|Overflow)&~IntToFp ? InfRes :
|
||||
(XInfE&~IntToFp)|Overflow ? InfRes :
|
||||
KillRes ? UfRes :
|
||||
Res;
|
||||
// *** probably can optimize the negation
|
||||
// NaNs sould ouput the same as a positive infinity
|
||||
// a 32bit unsigend result should be sign extended (as if it is not a unsigned number)
|
||||
// select the overflow integer result
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged result should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
|
||||
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}},Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}},Plus1};
|
||||
// round and negate the positive result if needed
|
||||
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal result (trmined and sign extended if nessisary)
|
||||
assign CvtIntResE = Invalid ? OfIntRes :
|
||||
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
|
||||
endmodule
|
@ -1,190 +0,0 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
module fcvtfp (
|
||||
input logic [10:0] XExpE, // input's exponent
|
||||
input logic [52:0] XManE, // input's mantissa
|
||||
input logic XSgnE, // input's sign
|
||||
input logic XZeroE, // is the input zero
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic XInfE, // is the input infinity
|
||||
input logic XNaNE, // is the input a NaN
|
||||
input logic XSNaNE, // is the input a signaling NaN
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // the input's precision (1 = double 0 = single)
|
||||
output logic [63:0] CvtFpResE, // the fp to fp conversion's result
|
||||
output logic [4:0] CvtFpFlgE); // the fp to fp conversion's flags
|
||||
|
||||
logic [12:0] DSExp; // double to single precision exponent
|
||||
logic Denorm; // is the double to single precision result denormalized
|
||||
logic Shift; // do you shift the double precision exponent (if single precision result is denormalized)
|
||||
logic [51:0] SDFrac; // single to double precision fraction
|
||||
logic [25:0] DSFrac; // double to single precision fraction
|
||||
logic [77:0] DSFracShifted; // single precision fraction shifted for double precision
|
||||
logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac; // rounding bits
|
||||
logic CalcPlus1, UfCalcPlus1, Plus1, UfPlus1; // do you add one to the result
|
||||
logic [12:0] DSExpFull; // full double to single exponent
|
||||
logic [22:0] DSResFrac; // final double to single fraction
|
||||
logic [7:0] DSResExp; // final double to single exponent
|
||||
logic [10:0] SDExp; // final single to double precision exponent
|
||||
logic Overflow, Underflow, Inexact; // flags
|
||||
logic [31:0] DSRes; // double to single precision result
|
||||
|
||||
|
||||
// add support for all formats
|
||||
// consider reordering code blocks so upconverting is in one region of the file
|
||||
// and downconverting is in the other region.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// LZC: Leading Zero Counter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// *** consider sharing this with fcvtint
|
||||
// *** emphasize parallel structure between the two
|
||||
// *** add a priorityencoder module to generic (similar to priorityonehot) and use it
|
||||
|
||||
// LZC - find the first 1 in the input's mantissa
|
||||
logic [8:0] i,NormCnt;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~XManE[52-i] & i <= 52) i = i+1; // search for leading one
|
||||
NormCnt = i;
|
||||
end
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Expoents
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// convert the single precion exponent to single precision.
|
||||
// - subtract the double precision exponent (1023) and add the
|
||||
// single precsision exponent (127)
|
||||
// - if the input is zero then kill the exponent
|
||||
|
||||
assign DSExp = ({2'b0,XExpE}-13'd1023+13'd127)&{13{~XZeroE}};
|
||||
|
||||
// is the converted double to single precision exponent in the denormalized range
|
||||
assign Denorm = $signed(DSExp) <= 0 & $signed(DSExp) > $signed(-(13'd23));
|
||||
|
||||
|
||||
// caluculate the final single to double precsion exponent
|
||||
// - subtract the single precision bias (127) and add the double
|
||||
// precision bias (127)
|
||||
// - if the result is zero or denormalized, kill the exponent
|
||||
assign SDExp = XExpE-({2'b0,NormCnt&{9{~XZeroE}}})+({11{XDenormE}}&1024-127); //*** seems ineffecient
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Fraction
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// normalize the single precision fraction for double precsion
|
||||
// - needed for denormal single precsion values
|
||||
assign SDFrac = XManE[51:0] << NormCnt;
|
||||
|
||||
// check if the double precision mantissa needs to be shifted
|
||||
// - the mantissa needs to be shifted if the single precision result is denormal
|
||||
assign Shift = Denorm | (($signed(DSExp) > $signed(-(13'd25))) & DSExp[12]);
|
||||
// shift the mantissa
|
||||
assign DSFracShifted = {XManE, 25'b0} >> ((-DSExp+1)&{13{Shift}}); //***might be some optimization here
|
||||
assign DSFrac = DSFracShifted[76:51];
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounder
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfSticky = |DSFracShifted[50:0];
|
||||
assign UfGuard = DSFrac[1];
|
||||
assign UfRound = DSFrac[0];
|
||||
assign UfLSBFrac = DSFrac[2];
|
||||
|
||||
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
assign Guard = DSFrac[2];
|
||||
assign Round = DSFrac[1];
|
||||
assign LSBFrac = DSFrac[3];
|
||||
|
||||
|
||||
always_comb begin // ***remove guard bit
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = XSgnE;//round down
|
||||
3'b011: CalcPlus1 = ~XSgnE;//round up
|
||||
3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmE)
|
||||
3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = XSgnE;//round down
|
||||
3'b011: UfCalcPlus1 = ~XSgnE;//round up
|
||||
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// if an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);
|
||||
|
||||
|
||||
|
||||
// round the double to single precision result
|
||||
assign {DSExpFull, DSResFrac} = {DSExp&{13{~Denorm}}, DSFrac[25:3]} + {35'b0,Plus1};
|
||||
assign DSResExp = DSExpFull[7:0];
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate the flags
|
||||
// - overflow, underflow and inexact can only be set by the double to single precision opperation
|
||||
// - don't set underflow or overflow if the input is NaN or Infinity
|
||||
// - don't set the inexact flag if the input is NaN
|
||||
assign Overflow = $signed(DSExpFull) >= $signed({5'b0, {8{1'b1}}}) & ~(XNaNE|XInfE);
|
||||
assign Underflow = (($signed(DSExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DSFrac) | (|DSFrac&~Denorm)) | ((DSExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE);
|
||||
assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE);
|
||||
|
||||
// pack the flags together and choose the result based on the opperation
|
||||
assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0};
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Result Selection
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
if(`IEEE754) begin
|
||||
// select the double to single precision result
|
||||
assign DSRes = XNaNE ? {XSgnE, {8{1'b1}}, 1'b1, XManE[50:29]} :
|
||||
Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} :
|
||||
Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
|
||||
{XSgnE, 8'hff, 23'b0} :
|
||||
{XSgnE, DSResExp, DSResFrac};
|
||||
|
||||
// select the final result based on the opperation
|
||||
//*** in al units before putting into : ? put in a seperate signal
|
||||
assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]};
|
||||
end else begin
|
||||
// select the double to single precision result
|
||||
assign DSRes = XNaNE ? {1'b0, {8{1'b1}}, 1'b1, 22'b0} :
|
||||
Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} :
|
||||
Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} :
|
||||
{XSgnE, 8'hff, 23'b0} :
|
||||
{XSgnE, DSResExp, DSResFrac};
|
||||
|
||||
// select the final result based on the opperation
|
||||
assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE&~XNaNE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]&{51{~XNaNE}}};
|
||||
end
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -1,190 +0,0 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// `include "../../config/rv64icfd/wally-config.vh"
|
||||
// `define XLEN 64
|
||||
module fcvtint (
|
||||
input logic XSgnE, // X's sign
|
||||
input logic [10:0] XExpE, // X's exponent
|
||||
input logic [52:0] XManE, // X's fraction
|
||||
input logic XZeroE, // is X zero
|
||||
input logic XNaNE, // is X NaN
|
||||
input logic XInfE, // is X infinity
|
||||
input logic XDenormE, // is X denormalized
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, // integer input
|
||||
input logic [2:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [6:0] LZResP; // lz output
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
|
||||
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
|
||||
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
|
||||
logic [63:0] Rounded; // rounded result
|
||||
logic [12:0] ExpVal; // unbiased X exponent
|
||||
logic [12:0] ShiftCnt; // how much is the mantissa shifted
|
||||
logic [64-1:0] IntIn; // trimed integer input
|
||||
logic [64-1:0] PosInt; // absolute value of the integer input
|
||||
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
|
||||
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
|
||||
logic Of, Uf; // did the integer result underflow or overflow
|
||||
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
|
||||
logic Plus1,CalcPlus1; // do you add one for rounding
|
||||
logic SgnRes; // sign of the floating point result
|
||||
logic Res64, In64; // is the result or input 64 bits
|
||||
logic RoundMSB; // most significant bit of the fraction
|
||||
logic RoundSgn; // sign of the rounded result
|
||||
logic Invalid, Inexact; // flags
|
||||
|
||||
// FOpCtrlE:
|
||||
// fcvt.w.s = 001
|
||||
// fcvt.wu.s = 011
|
||||
// fcvt.s.w = 000
|
||||
// fcvt.s.wu = 010
|
||||
// fcvt.l.s = 101
|
||||
// fcvt.lu.s = 111
|
||||
// fcvt.s.l = 100
|
||||
// fcvt.s.lu = 110
|
||||
// fcvt.w.d = 001
|
||||
// fcvt.wu.d = 011
|
||||
// fcvt.d.w = 000
|
||||
// fcvt.d.wu = 010
|
||||
// fcvt.l.d = 101
|
||||
// fcvt.lu.d = 111
|
||||
// fcvt.d.l = 100
|
||||
// fcvt.d.lu = 110
|
||||
// {long, unsigned, to int}
|
||||
|
||||
// *** revisit this module, explain in more depth
|
||||
// should the int to fp and fp to int paths be separated?
|
||||
// add support for all formats
|
||||
|
||||
// calculate signals based off the input and output's size
|
||||
assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]);
|
||||
assign In64 = (~FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&FOpCtrlE[0]);
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
|
||||
// calulate the unbiased exponent
|
||||
assign ExpVal = {1'b0,XExpE} - {1'b0, (11)'(`BIAS)} + {12'b0, XDenormE};
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
// position the input in the most significant bits
|
||||
assign IntIn = FOpCtrlE[2] ? {ForwardedSrcAE, {64-`XLEN{1'b0}}} : {ForwardedSrcAE[31:0], 32'b0};
|
||||
// make the integer positive
|
||||
assign PosInt = IntIn[64-1]&~FOpCtrlE[1] ? -IntIn : IntIn;
|
||||
// determine the integer's sign
|
||||
assign ResSgn = ~FOpCtrlE[1]&IntIn[64-1];
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~PosInt[64-1-i] & i < `XLEN) i = i+1; // search for leading one
|
||||
LZResP = i[5:0]+1; // compute shift count
|
||||
end
|
||||
|
||||
// if no one was found set to zero otherwise calculate the exponent
|
||||
assign TmpExp = i==`XLEN ? 0 : FmtE ? 11'd1023 + {3'b0, SubBits} - {4'b0, LZResP} : 11'd127 + {3'b0, SubBits} - {4'b0, LZResP};
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
|
||||
|
||||
// select the shift value and amount based on operation (to fp or int)
|
||||
assign ShiftCnt = FOpCtrlE[0] ? ExpVal : {6'b0, LZResP};
|
||||
assign ShiftVal = FOpCtrlE[0] ? {{64-1{1'b0}}, XManE} : {PosInt, 52'b0};
|
||||
|
||||
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
|
||||
// if the shift is negitive add a bit for sticky bit calculation
|
||||
// otherwise shift left
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64{1'b0}}, XManE[52:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt;
|
||||
|
||||
// truncate the shifted mantissa
|
||||
assign ShiftedMan = ShiftedManTmp[64+51:50];
|
||||
|
||||
// calculate sticky bit
|
||||
// - take into account the possible right shift from before
|
||||
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (~FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (~FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FOpCtrlE[0] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
|
||||
assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
|
||||
assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
|
||||
|
||||
always_comb begin//*** remove guard bit
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[0]) | (ResSgn&~FOpCtrlE[0]);//round down
|
||||
3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[0]) | (~ResSgn&~FOpCtrlE[0]);//round up
|
||||
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
// dont tound if the result is exact
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[0]);
|
||||
|
||||
// round the shifted mantissa
|
||||
assign RoundedTmp = ShiftedMan[64+1:2] + {64'b0, Plus1};
|
||||
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + {62'b0, Plus1} : {{TmpExp, ShiftedMan[64+1:43]} + {33'b0,Plus1}, 29'b0} ;
|
||||
|
||||
// fit the rounded result into the appropriate size and take the 2's complement if needed
|
||||
assign Rounded = Res64 ? XSgnE&FOpCtrlE[0] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
|
||||
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
|
||||
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
|
||||
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
|
||||
|
||||
|
||||
// check if the result overflows
|
||||
assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed({{5{Bits[7]}}, Bits}))) | (~XSgnE&RoundSgn&~FOpCtrlE[1]) | (RoundMSB&(ShiftCnt==({{5{Bits[7]}}, Bits}-1))) | (~XSgnE&XInfE) | XNaNE;
|
||||
|
||||
// check if the result underflows (this calculation changes if the result is signed or unsigned)
|
||||
assign Uf = FOpCtrlE[1] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed({{5{Bits[7]}}, Bits}))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
|
||||
// calculate the result's sign
|
||||
assign SgnRes = ~FOpCtrlE[2] & FOpCtrlE[0];
|
||||
|
||||
// select the integer result
|
||||
assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
|
||||
Uf ? FOpCtrlE[1] ? {63'b0, Plus1&~XSgnE} : SgnRes ? {{33{1'b1}}, 31'b0} : {1'b1, 63'b0} :
|
||||
|RoundedTmp ? Rounded[64-1:0] : 64'b0;
|
||||
|
||||
// select the floating point result
|
||||
assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
|
||||
|
||||
// select the result
|
||||
assign CvtResE = FOpCtrlE[0] ? CvtIntRes : CvtFPRes;
|
||||
|
||||
// calculate the flags
|
||||
// - only set invalid flag for out-of-range vales
|
||||
// - set inexact if in representable range and not exact
|
||||
|
||||
if(`IEEE754) begin // checks before rounding
|
||||
assign Invalid = (Of | Uf)&FOpCtrlE[0];
|
||||
assign Inexact = (Guard|Round|Sticky)&~(&FOpCtrlE[1:0]&(XSgnE|Of))&~((Of|Uf)&~FOpCtrlE[1]&FOpCtrlE[0]);
|
||||
assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact};
|
||||
end else begin // RISC-V checks if the result is in range after rounding
|
||||
assign Invalid = (Of | Uf)&FOpCtrlE[0];
|
||||
assign Inexact = (Guard|Round|Sticky)&~(&FOpCtrlE[1:0]&((XSgnE&~(ShiftCnt[12]&~Plus1))|Of))&~((Of|Uf)&~FOpCtrlE[1]&FOpCtrlE[0]);
|
||||
assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact};
|
||||
end
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -34,7 +34,7 @@ module fma(
|
||||
input logic reset,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall memory stage
|
||||
input logic [`FPSIZES/3:0] FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
|
||||
@ -43,8 +43,7 @@ module fma(
|
||||
input logic XSgnM, YSgnM, // input signs - memory stage
|
||||
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
|
||||
input logic ZOrigDenormE, // is the original precision denormalized
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is denorm
|
||||
input logic ZDenormE, // is denorm
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
|
||||
input logic XNaNM, YNaNM, ZNaNM, // is NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
|
||||
@ -73,21 +72,21 @@ module fma(
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
|
||||
logic Mult;
|
||||
logic ZOrigDenormM;
|
||||
logic ZDenormM;
|
||||
|
||||
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM});
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
|
||||
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
|
||||
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
|
||||
.FMAResM, .FMAFlgM);
|
||||
@ -101,10 +100,9 @@ module fma1(
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input's signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
@ -116,13 +114,11 @@ module fma1(
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
|
||||
);
|
||||
|
||||
logic [`NE-1:0] Denorm; // value of a denormaized number based on precision
|
||||
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
|
||||
logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1)
|
||||
logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted
|
||||
logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed
|
||||
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
|
||||
logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
@ -133,8 +129,8 @@ module fma1(
|
||||
|
||||
|
||||
// calculate the product's exponent
|
||||
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal,
|
||||
.Denorm, .ProdExpE);
|
||||
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE,
|
||||
.ProdExpE);
|
||||
|
||||
// multiplication of the mantissa's
|
||||
mult mult(.XManE, .YManE, .ProdManE);
|
||||
@ -143,7 +139,7 @@ module fma1(
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal,
|
||||
align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE,
|
||||
.AlignedAddendE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// calculate the signs and take the opperation into account
|
||||
@ -165,53 +161,14 @@ endmodule
|
||||
|
||||
|
||||
module expadd(
|
||||
input logic [`FPSIZES/3:0] FmtE, // precision
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision
|
||||
input logic [`NE-1:0] XExpE, YExpE, // input exponents
|
||||
input logic XDenormE, YDenormE, // are the inputs denormalized
|
||||
input logic XZeroE, YZeroE, // are the inputs zero
|
||||
output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
|
||||
output logic [`NE-1:0] Denorm, // value of denormalized exponent
|
||||
output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2
|
||||
);
|
||||
|
||||
|
||||
// denormalized numbers have diffrent values depending on which precison it is.
|
||||
// FLEN - 1
|
||||
// Other - BIAS - other bias + 1
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Denorm = 1;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
`FMT: Denorm = 1;
|
||||
`FMT1: Denorm = `BIAS-`BIAS1+1;
|
||||
`FMT2: Denorm = `BIAS-`BIAS2+1;
|
||||
default: Denorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
2'h3: Denorm = 1;
|
||||
2'h1: Denorm = `BIAS-`D_BIAS+1;
|
||||
2'h0: Denorm = `BIAS-`S_BIAS+1;
|
||||
2'h2: Denorm = `BIAS-`H_BIAS+1;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// pick denormalized value or exponent
|
||||
assign XExpVal = XDenormE ? Denorm : XExpE;
|
||||
assign YExpVal = YDenormE ? Denorm : YExpE;
|
||||
// kill the exponent if the product is zero - either X or Y is 0
|
||||
assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
|
||||
assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
|
||||
|
||||
endmodule
|
||||
|
||||
@ -258,13 +215,10 @@ endmodule
|
||||
|
||||
|
||||
module align(
|
||||
input logic [`NE-1:0] ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] ZManE, // fractions in U(0.NF) format]
|
||||
input logic ZDenormE, // is the input denormal
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
|
||||
input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
|
||||
input logic [`NE+1:0] ProdExpE, // the product's exponent
|
||||
input logic [`NE-1:0] Denorm, // the biased value of a denormalized number
|
||||
output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
|
||||
output logic AddendStickyE, // Sticky bit calculated from the aliged addend
|
||||
output logic KillProdE // should the product be set to zero
|
||||
@ -273,7 +227,6 @@ module align(
|
||||
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
|
||||
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
|
||||
logic [`NE-1:0] ZExpVal; // Exponent value after taking into account denormals
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
@ -282,11 +235,9 @@ module align(
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have a diffrent exponent value depending on the precision
|
||||
assign ZExpVal = ZDenormE ? Denorm : ZExpE;
|
||||
// assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
|
||||
// *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22
|
||||
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
|
||||
// KP- yes we used ProdExpE originally but we did this for timing
|
||||
assign AlignCnt = XZeroE|YZeroE ? -(`NE+2)'($unsigned(1)) : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF)+3 - {2'b0, ZExpE};
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
@ -369,7 +320,7 @@ module add(
|
||||
|
||||
// Do the addition
|
||||
// - calculate a positive and negitive sum in parallel
|
||||
assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
|
||||
assign PreSum = AlignedAddendInv + {{`NF+3{1'b0}}, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
|
||||
assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)};
|
||||
|
||||
// Is the sum negitive
|
||||
@ -409,22 +360,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
|
||||
|
||||
|
||||
lzc lzc(.f, .NormCntE);
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
|
||||
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [3*`NF+6:0] f,
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift
|
||||
);
|
||||
|
||||
logic [$clog2(3*`NF+7)-1:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one
|
||||
NormCntE = i;
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
@ -439,7 +378,7 @@ module fma2(
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
@ -450,7 +389,7 @@ module fma2(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZOrigDenormM, // is the original precision denormalized
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic Mult, // multiply opperation
|
||||
@ -465,7 +404,7 @@ module fma2(
|
||||
logic ResultSgn, ResultSgnTmp; // Result sign
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+2:0] NormSum; // normalized sum
|
||||
logic [`NF+1:0] NormSum; // normalized sum
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
@ -486,7 +425,7 @@ module fma2(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
|
||||
.ZOrigDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
|
||||
|
||||
|
||||
@ -533,7 +472,7 @@ module fma2(
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
|
||||
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
|
||||
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
|
||||
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
|
||||
@ -578,11 +517,11 @@ module normalize(
|
||||
input logic [`NE-1:0] ZExpM, // exponent of Z
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZOrigDenormM,
|
||||
input logic ZDenormM,
|
||||
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
|
||||
output logic [`NF+2:0] NormSum, // normalized sum
|
||||
output logic [`NF+1:0] NormSum, // normalized sum
|
||||
output logic SumZero, // is the sum zero
|
||||
output logic NormSumSticky, UfSticky, // sticky bits
|
||||
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
@ -599,12 +538,12 @@ module normalize(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures/ whiteboard... if still there
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZOrigDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
@ -617,8 +556,8 @@ module normalize(
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: SumExpTmp = SumExpTmpTmp;
|
||||
`FMT1: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}};
|
||||
`FMT2: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}};
|
||||
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
|
||||
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
|
||||
default: SumExpTmp = `NE+2'bx;
|
||||
endcase
|
||||
end
|
||||
@ -627,9 +566,9 @@ module normalize(
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: SumExpTmp = SumExpTmpTmp;
|
||||
2'h1: SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h0: SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h2: SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -707,27 +646,27 @@ module normalize(
|
||||
assign LZAPlus2 = SumShifted[3*`NF+8];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
|
||||
|
||||
// Calculate the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// 3*NF+5 - NF1 - 3
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
|
||||
|
||||
end
|
||||
|
||||
@ -735,17 +674,17 @@ module normalize(
|
||||
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
|
||||
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
|
||||
|
||||
endmodule
|
||||
|
||||
module fmaround(
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic UfSticky, // sticky bit for underlow calculation
|
||||
input logic [`NF+2:0] NormSum, // normalized sum
|
||||
input logic [`NF+1:0] NormSum, // normalized sum
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic NormSumSticky, // normalized sum's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
@ -799,83 +738,53 @@ module fmaround(
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = NormSum[2];
|
||||
assign Round = NormSum[1];
|
||||
assign LSBNormSum = NormSum[3];
|
||||
assign LSBNormSum = NormSum[2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfGuard = NormSum[1];
|
||||
assign UfRound = NormSum[0];
|
||||
assign UfLSBNormSum = NormSum[2];
|
||||
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | NormSum[0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/-------------NF---------------,
|
||||
// | NF1 | 3 | |
|
||||
// | NF1 | 2 | |
|
||||
// '-------NF1------^
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
|
||||
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
|
||||
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[2];
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[3];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[1];
|
||||
UfRound = NormSum[0];
|
||||
UfLSBNormSum = NormSum[2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[0];
|
||||
end
|
||||
`FMT1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`NF1+2];
|
||||
Round = NormSum[`NF-`NF1+1];
|
||||
LSBNormSum = NormSum[`NF-`NF1+3];
|
||||
LSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`NF1+1];
|
||||
UfRound = NormSum[`NF-`NF1];
|
||||
UfLSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`NF1];
|
||||
end
|
||||
`FMT2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`NF2+2];
|
||||
Round = NormSum[`NF-`NF2+1];
|
||||
LSBNormSum = NormSum[`NF-`NF2+3];
|
||||
LSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`NF2+1];
|
||||
UfRound = NormSum[`NF-`NF2];
|
||||
UfLSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`NF2];
|
||||
end
|
||||
default: begin
|
||||
Guard = 1'bx;
|
||||
Round = 1'bx;
|
||||
LSBNormSum = 1'bx;
|
||||
UfGuard = 1'bx;
|
||||
UfRound = 1'bx;
|
||||
UfLSBNormSum = 1'bx;
|
||||
Sticky = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -885,56 +794,40 @@ module fmaround(
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[2];
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[3];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[1];
|
||||
UfRound = NormSum[0];
|
||||
UfLSBNormSum = NormSum[2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[0];
|
||||
end
|
||||
2'h1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`D_NF+2];
|
||||
Round = NormSum[`NF-`D_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`D_NF+1];
|
||||
UfRound = NormSum[`NF-`D_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`D_NF];
|
||||
end
|
||||
2'h0: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`S_NF+2];
|
||||
Round = NormSum[`NF-`S_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`S_NF+1];
|
||||
UfRound = NormSum[`NF-`S_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`S_NF];
|
||||
end
|
||||
2'h2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`H_NF+2];
|
||||
Round = NormSum[`NF-`H_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`H_NF+1];
|
||||
UfRound = NormSum[`NF-`H_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`H_NF];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
// used to determine underflow flag
|
||||
assign UfLSBNormSum = Round;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
@ -944,28 +837,28 @@ module fmaround(
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up
|
||||
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
@ -973,9 +866,9 @@ module fmaround(
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
@ -1011,7 +904,7 @@ module fmaround(
|
||||
|
||||
end
|
||||
|
||||
assign NormSumTruncated = NormSum[`NF+2:3];
|
||||
assign NormSumTruncated = NormSum[`NF+1:2];
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
assign ResultExp = FullResultExp[`NE-1:0];
|
||||
|
||||
@ -1027,7 +920,7 @@ module fmaflags(
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
output logic Invalid, Overflow, Underflow, // flags used to select the result
|
||||
output logic [4:0] FMAFlgM // FMA flags
|
||||
);
|
||||
@ -1083,12 +976,12 @@ module fmaflags(
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
@ -1103,12 +996,13 @@ module resultselect(
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZOrigDenormM, // is the original precision denormalized
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic ResultSgn, // the result's sign
|
||||
@ -1134,7 +1028,7 @@ module resultselect(
|
||||
end
|
||||
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1153,7 +1047,7 @@ module resultselect(
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
@ -1173,7 +1067,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1189,7 +1083,7 @@ module resultselect(
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
@ -1206,7 +1100,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
|
||||
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
|
||||
@ -1244,7 +1138,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1260,7 +1154,7 @@ module resultselect(
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
|
||||
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
|
||||
@ -1277,7 +1171,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
|
||||
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
|
||||
@ -1295,7 +1189,7 @@ module resultselect(
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
|
||||
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
|
||||
|
@ -65,7 +65,7 @@ module fpu (
|
||||
// control signals
|
||||
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division or squareroot
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
|
||||
@ -77,25 +77,25 @@ module fpu (
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
// regfile signals
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM, YSgnM; // input's sign - memory stage
|
||||
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [10:0] ZExpM; // input's exponent - memory stage
|
||||
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
|
||||
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XNaNQ, YNaNQ; // is the input a NaN - divide
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
|
||||
logic XDenormE, ZDenormE; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XZeroQ, YZeroQ; // is the input zero - divide
|
||||
@ -103,34 +103,33 @@ module fpu (
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XInfQ, YInfQ; // is the input infinity - divide
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic XNormE; // is normal
|
||||
logic ZOrigDenormE, XOrigDenormE;
|
||||
logic FmtQ;
|
||||
logic FOpCtrlQ;
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
logic [4:0] FDivFlgM; // divide/squareroot flags
|
||||
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [4:0] FMAFlgM; // FMA/multiply result
|
||||
logic [63:0] ReadResW; // read result (load instruction)
|
||||
logic [63:0] CvtResE; // FP <-> int convert result
|
||||
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
|
||||
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
|
||||
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
|
||||
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
|
||||
logic [63:0] ClassResE; // classify result
|
||||
logic [63:0] CmpResE; // compare result
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [63:0] SgnResE; // sign injection result
|
||||
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
|
||||
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
|
||||
logic [`XLEN-1:0] FIntResE;
|
||||
logic [63:0] FPUResultW; // final FP result being written to the FP register
|
||||
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
|
||||
// other signals
|
||||
logic FDivSqrtDoneE; // is divide done
|
||||
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
|
||||
logic load_preload; // enable for FF on fpdivsqrt
|
||||
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
|
||||
logic [63:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
|
||||
// DECODE STAGE
|
||||
|
||||
@ -146,12 +145,12 @@ module fpu (
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
|
||||
// D/E pipeline registers
|
||||
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(16) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
|
||||
@ -162,24 +161,46 @@ module fpu (
|
||||
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
|
||||
mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
|
||||
{2'b0, {10{1'b1}}, 52'b0},
|
||||
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)},
|
||||
FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
|
||||
|
||||
|
||||
generate
|
||||
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
|
||||
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
|
||||
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
|
||||
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
|
||||
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
|
||||
// Force Z to be 0 for multiply instructions
|
||||
mux2 #(64) fmulzeromux (64'hFFFFFFFF00000000, 64'b0, FmtE, BoxedZeroE); // NaN boxing for 32-bit zero
|
||||
mux3 #(64) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
|
||||
|
||||
generate
|
||||
if(`FPSIZES == 1) assign BoxedZeroE = 0;
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
|
||||
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
|
||||
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
|
||||
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
|
||||
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
|
||||
|
||||
// unpack unit
|
||||
// - splits FP inputs into their various parts
|
||||
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
|
||||
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE, .XOrigDenormE,
|
||||
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
|
||||
|
||||
// FMA
|
||||
// - two stage FMA
|
||||
@ -188,26 +209,36 @@ module fpu (
|
||||
// - handles FMA and multiply instructions
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
|
||||
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FOpCtrlE, .ZOrigDenormE,
|
||||
.FOpCtrlE,
|
||||
.FmtE, .FmtM, .FrmM,
|
||||
.FMAFlgM, .FMAResM);
|
||||
|
||||
// fpdivsqrt using Goldschmidt's iteration
|
||||
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
|
||||
if(`FLEN == 64) begin
|
||||
flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
|
||||
.clear(FDivSqrtDoneE), .en(load_preload),
|
||||
.reset(reset), .clk(clk));
|
||||
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
|
||||
flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
|
||||
.clear(FDivSqrtDoneE), .en(load_preload),
|
||||
.reset(reset), .clk(clk));
|
||||
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}),
|
||||
end
|
||||
else if (`FLEN == 32) begin
|
||||
flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
|
||||
.clear(FDivSqrtDoneE), .en(load_preload),
|
||||
.reset(reset), .clk(clk));
|
||||
flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
|
||||
.clear(FDivSqrtDoneE), .en(load_preload),
|
||||
.reset(reset), .clk(clk));
|
||||
end
|
||||
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
|
||||
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
|
||||
.clear(FDivSqrtDoneE), .en(load_preload),
|
||||
.reset(reset), .clk(clk));
|
||||
fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
|
||||
fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
|
||||
.reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
|
||||
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
|
||||
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
|
||||
@ -215,40 +246,55 @@ module fpu (
|
||||
// other FP execution units
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XOrigDenormE,
|
||||
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
|
||||
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE};
|
||||
|
||||
// NaN Block SrcA
|
||||
generate
|
||||
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
|
||||
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
// select a result that may be written to the FP register
|
||||
mux4 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
|
||||
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
|
||||
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
|
||||
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
if (`FLEN>`XLEN)
|
||||
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
else
|
||||
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
|
||||
// *** make sure the fpu matches the chapter diagram
|
||||
|
||||
// E/M pipe registers
|
||||
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
flopenrc #(54) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(54) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
|
||||
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
|
||||
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
|
||||
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FrmE, FmtE},
|
||||
{FRegWriteM, FResultSelM, FrmM, FmtM});
|
||||
|
||||
@ -258,10 +304,10 @@ module fpu (
|
||||
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
|
||||
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, FmtM},
|
||||
{FRegWriteW, FResultSelW, FmtW});
|
||||
|
||||
@ -270,8 +316,18 @@ module fpu (
|
||||
// put ReadData into NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// - for load instruction
|
||||
mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
|
||||
generate
|
||||
if(`FPSIZES == 1) assign ReadResW = {{`FLEN-`XLEN{1'b1}}, ReadDataW};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ReadDataW[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ReadDataW[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{1'b1}}, ReadDataW[`D_LEN-1:0]},
|
||||
{{`FLEN-`H_LEN{1'b1}}, ReadDataW[`H_LEN-1:0]},
|
||||
{{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); // NaN boxing zeroes
|
||||
endgenerate
|
||||
|
||||
// select the result to be written to the FP register
|
||||
mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
|
||||
if(`FLEN>=64)
|
||||
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
|
||||
endmodule // fpu
|
||||
|
@ -33,10 +33,10 @@ module fregfile (
|
||||
input logic clk, reset,
|
||||
input logic we4,
|
||||
input logic [4:0] a1, a2, a3, a4,
|
||||
input logic [63:0] wd4,
|
||||
output logic [63:0] rd1, rd2, rd3);
|
||||
input logic [`FLEN-1:0] wd4,
|
||||
output logic [`FLEN-1:0] rd1, rd2, rd3);
|
||||
|
||||
logic [63:0] rf[31:0];
|
||||
logic [`FLEN-1:0] rf[31:0];
|
||||
integer i;
|
||||
|
||||
// three ported register file
|
||||
|
@ -1,29 +0,0 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fsgn (
|
||||
input logic XSgnE, YSgnE, // X and Y sign bits
|
||||
input logic [63:0] FSrcXE, // X
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
input logic [1:0] SgnOpCodeE, // operation control
|
||||
output logic [63:0] SgnResE // result
|
||||
);
|
||||
|
||||
logic ResSgn;
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of FSrcYE
|
||||
//01 - fsgnjn - negate sign value of FSrcYE
|
||||
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
|
||||
//
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]};
|
||||
|
||||
|
||||
endmodule
|
80
pipelined/src/fpu/fsgninj.sv
Executable file
80
pipelined/src/fpu/fsgninj.sv
Executable file
@ -0,0 +1,80 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: Katherine Parry
|
||||
// Modified: 6/23/2021
|
||||
//
|
||||
// Purpose: FPU Sign Injection instructions
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fsgninj (
|
||||
input logic XSgnE, YSgnE, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] FSrcXE, // X
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [1:0] SgnOpCodeE, // operation control
|
||||
output logic [`FLEN-1:0] SgnResE // result
|
||||
);
|
||||
|
||||
logic ResSgn;
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of FSrcYE
|
||||
//01 - fsgnjn - negate sign value of FSrcYE
|
||||
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
|
||||
//
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
|
||||
|
||||
else if (`FPSIZES == 2)
|
||||
assign SgnResE = FmtE ? {ResSgn, FSrcXE[`FLEN-2:0]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
|
||||
|
||||
else if (`FPSIZES == 3)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
|
||||
`FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
|
||||
`FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
|
||||
default: SgnResE = 0;
|
||||
endcase
|
||||
|
||||
else if (`FPSIZES == 4)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'h3: SgnResE = {ResSgn, FSrcXE[`Q_LEN-2:0]};
|
||||
2'h1: SgnResE = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, FSrcXE[`D_LEN-2:0]};
|
||||
2'h0: SgnResE = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, FSrcXE[`S_LEN-2:0]};
|
||||
2'h2: SgnResE = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, FSrcXE[`H_LEN-2:0]};
|
||||
endcase
|
||||
|
||||
|
||||
endmodule
|
@ -2,555 +2,35 @@
|
||||
|
||||
module unpack (
|
||||
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
|
||||
input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
|
||||
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNormE, // is X a normalized number
|
||||
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
|
||||
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
|
||||
output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic XOrigDenormE, ZOrigDenormE, // is the original precision denormalized
|
||||
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
|
||||
);
|
||||
|
||||
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
|
||||
logic XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero
|
||||
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
|
||||
logic XExpZero, YExpZero, ZExpZero; // is the exponent zero
|
||||
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s
|
||||
|
||||
if (`FPSIZES == 1) begin // if there is only one floating point format supported
|
||||
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE),
|
||||
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
|
||||
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
|
||||
|
||||
// sign bit
|
||||
assign XSgnE = X[`FLEN-1];
|
||||
assign YSgnE = Y[`FLEN-1];
|
||||
assign ZSgnE = Z[`FLEN-1];
|
||||
|
||||
// exponent
|
||||
assign XExpE = X[`FLEN-2:`NF];
|
||||
assign YExpE = Y[`FLEN-2:`NF];
|
||||
assign ZExpE = Z[`FLEN-2:`NF];
|
||||
|
||||
// fraction (no assumed 1)
|
||||
assign XFracE = X[`NF-1:0];
|
||||
assign YFracE = Y[`NF-1:0];
|
||||
assign ZFracE = Z[`NF-1:0];
|
||||
|
||||
// is the exponent non-zero
|
||||
assign XExpNonzero = |XExpE;
|
||||
assign YExpNonzero = |YExpE;
|
||||
assign ZExpNonzero = |ZExpE;
|
||||
|
||||
// is the exponent all 1's
|
||||
assign XExpMaxE = &XExpE;
|
||||
assign YExpMaxE = &YExpE;
|
||||
assign ZExpMaxE = &ZExpE;
|
||||
|
||||
assign XOrigDenormE = 1'b0;
|
||||
assign ZOrigDenormE = 1'b0;
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
|
||||
//***need better names for these constants
|
||||
// largest format | smaller format
|
||||
//----------------------------------
|
||||
// `FLEN | `LEN1 length of floating point number
|
||||
// `NE | `NE1 length of exponent
|
||||
// `NF | `NF1 length of fraction
|
||||
// `BIAS | `BIAS1 exponent's bias value
|
||||
// `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// double and single
|
||||
// single and half
|
||||
|
||||
// Not needed but can also handle:
|
||||
// quad and double
|
||||
// quad and single
|
||||
// quad and half
|
||||
// double and half
|
||||
|
||||
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
|
||||
logic YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
|
||||
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
|
||||
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
|
||||
assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1];
|
||||
assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1];
|
||||
assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
|
||||
// - if the original precision had a denormal number convert the exponent value 1
|
||||
assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
|
||||
assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
|
||||
assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
|
||||
assign YOrigDenormE = FmtE ? 0 : ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
|
||||
assign ZOrigDenormE = FmtE ? 0 : ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
|
||||
|
||||
// extract the fraction, add trailing zeroes to the mantissa if nessisary
|
||||
assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
|
||||
// is the exponent non-zero
|
||||
assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1];
|
||||
assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1];
|
||||
assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1];
|
||||
|
||||
// is the exponent all 1's
|
||||
assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1];
|
||||
assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1];
|
||||
assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin // three floating point precsions supported
|
||||
|
||||
//***need better names for these constants
|
||||
// largest format | larger format | smallest format
|
||||
//---------------------------------------------------
|
||||
// `FLEN | `LEN1 | `LEN2 length of floating point number
|
||||
// `NE | `NE1 | `NE2 length of exponent
|
||||
// `NF | `NF1 | `NF2 length of fraction
|
||||
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
|
||||
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// quad and double and single
|
||||
// double and single and half
|
||||
|
||||
// Not needed but can also handle:
|
||||
// quad and double and half
|
||||
// quad and single and half
|
||||
|
||||
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
|
||||
logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
|
||||
logic YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
|
||||
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision
|
||||
assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
|
||||
assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
|
||||
assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
|
||||
|
||||
// There are 2 case statements
|
||||
// - one for other singals and one for sgn/exp/frac
|
||||
// - need two for the dependencies in the expoenent calculation
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
`FMT: begin // if input is largest precision (`FLEN - ie quad or double)
|
||||
|
||||
// This is the original format so set OrigDenorm to 0
|
||||
XOrigDenormE = 1'b0;
|
||||
YOrigDenormE = 1'b0;
|
||||
ZOrigDenormE = 1'b0;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |X[`FLEN-2:`NF];
|
||||
YExpNonzero = |Y[`FLEN-2:`NF];
|
||||
ZExpNonzero = |Z[`FLEN-2:`NF];
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &X[`FLEN-2:`NF];
|
||||
YExpMaxE = &Y[`FLEN-2:`NF];
|
||||
ZExpMaxE = &Z[`FLEN-2:`NF];
|
||||
end
|
||||
`FMT1: begin // if input is larger precsion (`LEN1 - double or single)
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen1[`LEN1-2:`NF1];
|
||||
YExpNonzero = |YLen1[`LEN1-2:`NF1];
|
||||
ZExpNonzero = |ZLen1[`LEN1-2:`NF1];
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &XLen1[`LEN1-2:`NF1];
|
||||
YExpMaxE = &YLen1[`LEN1-2:`NF1];
|
||||
ZExpMaxE = &ZLen1[`LEN1-2:`NF1];
|
||||
end
|
||||
`FMT2: begin // if input is smallest precsion (`LEN2 - single or half)
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen2[`LEN2-2:`NF2];
|
||||
YExpNonzero = |YLen2[`LEN2-2:`NF2];
|
||||
ZExpNonzero = |ZLen2[`LEN2-2:`NF2];
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &XLen2[`LEN2-2:`NF2];
|
||||
YExpMaxE = &YLen2[`LEN2-2:`NF2];
|
||||
ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
|
||||
end
|
||||
default: begin
|
||||
XOrigDenormE = 0;
|
||||
YOrigDenormE = 0;
|
||||
ZOrigDenormE = 0;
|
||||
XExpNonzero = 0;
|
||||
YExpNonzero = 0;
|
||||
ZExpNonzero = 0;
|
||||
XExpMaxE = 0;
|
||||
YExpMaxE = 0;
|
||||
ZExpMaxE = 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
`FMT: begin // if input is largest precision (`FLEN - ie quad or double)
|
||||
// extract the sign bit
|
||||
XSgnE = X[`FLEN-1];
|
||||
YSgnE = Y[`FLEN-1];
|
||||
ZSgnE = Z[`FLEN-1];
|
||||
|
||||
// extract the exponent
|
||||
XExpE = X[`FLEN-2:`NF];
|
||||
YExpE = Y[`FLEN-2:`NF];
|
||||
ZExpE = Z[`FLEN-2:`NF];
|
||||
|
||||
// extract the fraction
|
||||
XFracE = X[`NF-1:0];
|
||||
YFracE = Y[`NF-1:0];
|
||||
ZFracE = Z[`NF-1:0];
|
||||
end
|
||||
`FMT1: begin // if input is larger precsion (`LEN1 - double or single)
|
||||
|
||||
// extract the sign bit
|
||||
XSgnE = XLen1[`LEN1-1];
|
||||
YSgnE = YLen1[`LEN1-1];
|
||||
ZSgnE = ZLen1[`LEN1-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the larger precision's exponent to use the largest precision's bias
|
||||
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
end
|
||||
`FMT2: begin // if input is smallest precsion (`LEN2 - single or half)
|
||||
|
||||
// exctract the sign bit
|
||||
XSgnE = XLen2[`LEN2-1];
|
||||
YSgnE = YLen2[`LEN2-1];
|
||||
ZSgnE = ZLen2[`LEN2-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the smallest precision's exponent to use the largest precision's bias
|
||||
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
end
|
||||
default: begin
|
||||
XSgnE = 0;
|
||||
YSgnE = 0;
|
||||
ZSgnE = 0;
|
||||
XExpE = 0;
|
||||
YExpE = 0;
|
||||
ZExpE = 0;
|
||||
XFracE = 0;
|
||||
YFracE = 0;
|
||||
ZFracE = 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
|
||||
|
||||
// quad | double | single | half
|
||||
//-------------------------------------------------------------------
|
||||
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
|
||||
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
|
||||
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
|
||||
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
|
||||
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
|
||||
logic [`D_LEN-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
|
||||
logic [`S_LEN-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
|
||||
logic [`H_LEN-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
|
||||
logic YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
|
||||
assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
|
||||
assign YLen1 = &Y[`Q_LEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
|
||||
assign ZLen1 = &Z[`Q_LEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision
|
||||
assign XLen2 = &X[`Q_LEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
|
||||
assign YLen2 = &Y[`Q_LEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
|
||||
assign ZLen2 = &Z[`Q_LEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision
|
||||
assign XLen3 = &X[`Q_LEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
|
||||
assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
|
||||
assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
|
||||
|
||||
|
||||
// There are 2 case statements
|
||||
// - one for other singals and one for sgn/exp/frac
|
||||
// - need two for the dependencies in the expoenent calculation
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
2'b11: begin // if input is quad percision
|
||||
|
||||
// This is the original format so set OrigDenorm to 0
|
||||
XOrigDenormE = 1'b0;
|
||||
YOrigDenormE = 1'b0;
|
||||
ZOrigDenormE = 1'b0;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |X[`Q_LEN-2:`Q_NF];
|
||||
YExpNonzero = |Y[`Q_LEN-2:`Q_NF];
|
||||
ZExpNonzero = |Z[`Q_LEN-2:`Q_NF];
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &X[`Q_LEN-2:`Q_NF];
|
||||
YExpMaxE = &Y[`Q_LEN-2:`Q_NF];
|
||||
ZExpMaxE = &Z[`Q_LEN-2:`Q_NF];
|
||||
end
|
||||
2'b01: begin // if input is double percision
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &XLen1[`D_LEN-2:`D_NF];
|
||||
YExpMaxE = &YLen1[`D_LEN-2:`D_NF];
|
||||
ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen1[`D_LEN-2:`D_NF];
|
||||
YExpNonzero = |YLen1[`D_LEN-2:`D_NF];
|
||||
ZExpNonzero = |ZLen1[`D_LEN-2:`D_NF];
|
||||
end
|
||||
2'b00: begin // if input is single percision
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &XLen2[`S_LEN-2:`S_NF];
|
||||
YExpMaxE = &YLen2[`S_LEN-2:`S_NF];
|
||||
ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen2[`S_LEN-2:`S_NF];
|
||||
YExpNonzero = |YLen2[`S_LEN-2:`S_NF];
|
||||
ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF];
|
||||
end
|
||||
2'b10: begin // if input is half percision
|
||||
|
||||
// is the exponent all 1's
|
||||
XExpMaxE = &XLen3[`H_LEN-2:`H_NF];
|
||||
YExpMaxE = &YLen3[`H_LEN-2:`H_NF];
|
||||
ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen3[`H_LEN-2:`H_NF];
|
||||
YExpNonzero = |YLen3[`H_LEN-2:`H_NF];
|
||||
ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
2'b11: begin // if input is quad percision
|
||||
// extract sign bit
|
||||
XSgnE = X[`Q_LEN-1];
|
||||
YSgnE = Y[`Q_LEN-1];
|
||||
ZSgnE = Z[`Q_LEN-1];
|
||||
|
||||
// extract the exponent
|
||||
XExpE = X[`Q_LEN-2:`Q_NF];
|
||||
YExpE = Y[`Q_LEN-2:`Q_NF];
|
||||
ZExpE = Z[`Q_LEN-2:`Q_NF];
|
||||
|
||||
// extract the fraction
|
||||
XFracE = X[`Q_NF-1:0];
|
||||
YFracE = Y[`Q_NF-1:0];
|
||||
ZFracE = Z[`Q_NF-1:0];
|
||||
end
|
||||
2'b01: begin // if input is double percision
|
||||
// extract sign bit
|
||||
XSgnE = XLen1[`D_LEN-1];
|
||||
YSgnE = YLen1[`D_LEN-1];
|
||||
ZSgnE = ZLen1[`D_LEN-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the double precsion exponent into quad precsion
|
||||
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
YFracE = {YLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
ZFracE = {ZLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
end
|
||||
2'b00: begin // if input is single percision
|
||||
// extract sign bit
|
||||
XSgnE = XLen2[`S_LEN-1];
|
||||
YSgnE = YLen2[`S_LEN-1];
|
||||
ZSgnE = ZLen2[`S_LEN-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the single precsion exponent into quad precsion
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
end
|
||||
2'b10: begin // if input is half percision
|
||||
// extract sign bit
|
||||
XSgnE = XLen3[`H_LEN-1];
|
||||
YSgnE = YLen3[`H_LEN-1];
|
||||
ZSgnE = ZLen3[`H_LEN-1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the half precsion exponent into quad precsion
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// is the exponent all 0's
|
||||
assign XExpZero = ~XExpNonzero;
|
||||
assign YExpZero = ~YExpNonzero;
|
||||
assign ZExpZero = ~ZExpNonzero;
|
||||
|
||||
// is the fraction zero
|
||||
assign XFracZero = ~|XFracE;
|
||||
assign YFracZero = ~|YFracE;
|
||||
assign ZFracZero = ~|ZFracE;
|
||||
|
||||
// add the assumed one (or zero if denormal or zero) to create the mantissa
|
||||
assign XManE = {XExpNonzero, XFracE};
|
||||
assign YManE = {YExpNonzero, YFracE};
|
||||
assign ZManE = {ZExpNonzero, ZFracE};
|
||||
|
||||
// is X normalized
|
||||
assign XNormE = ~(XExpMaxE|XExpZero);
|
||||
|
||||
// is the input a NaN
|
||||
// - force to be a NaN if it isn't properly Nan Boxed
|
||||
assign XNaNE = XExpMaxE & ~XFracZero;
|
||||
assign YNaNE = YExpMaxE & ~YFracZero;
|
||||
assign ZNaNE = ZExpMaxE & ~ZFracZero;
|
||||
|
||||
// is the input a singnaling NaN
|
||||
assign XSNaNE = XNaNE&~XFracE[`NF-1];
|
||||
assign YSNaNE = YNaNE&~YFracE[`NF-1];
|
||||
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
|
||||
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE),
|
||||
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
|
||||
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
|
||||
|
||||
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE),
|
||||
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
|
||||
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
|
||||
// is the input denormalized
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
// is the input infinity
|
||||
assign XInfE = XExpMaxE & XFracZero;
|
||||
assign YInfE = YExpMaxE & YFracZero;
|
||||
assign ZInfE = ZExpMaxE & ZFracZero;
|
||||
|
||||
// is the input zero
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
assign XDenormE = ~XExpNonZero & ~XFracZero;
|
||||
assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
|
||||
endmodule
|
241
pipelined/src/fpu/unpackinput.sv
Normal file
241
pipelined/src/fpu/unpackinput.sv
Normal file
@ -0,0 +1,241 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module unpackinput (
|
||||
input logic [`FLEN-1:0] In, // inputs from register file
|
||||
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic Sgn, // sign bits of XYZ
|
||||
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic NaN, // is XYZ a NaN
|
||||
output logic SNaN, // is XYZ a signaling NaN
|
||||
output logic Zero, // is XYZ zero
|
||||
output logic Inf, // is XYZ infinity
|
||||
output logic ExpNonZero, // is the exponent not zero
|
||||
output logic FracZero, // is the fraction zero
|
||||
output logic ExpMax // does In have the maximum exponent (NaN or Inf)
|
||||
);
|
||||
|
||||
logic [`NF-1:0] Frac; //Fraction of XYZ
|
||||
logic ExpZero;
|
||||
logic BadNaNBox;
|
||||
|
||||
if (`FPSIZES == 1) begin // if there is only one floating point format supported
|
||||
assign BadNaNBox = 0;
|
||||
assign Sgn = In[`FLEN-1]; // sign bit
|
||||
assign Frac = In[`NF-1:0]; // fraction (no assumed 1)
|
||||
assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero
|
||||
assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. Denormalized numbers have effective biased exponent of 1
|
||||
assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's
|
||||
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
|
||||
//***need better names for these constants
|
||||
// largest format | smaller format
|
||||
//----------------------------------
|
||||
// `FLEN | `LEN1 length of floating point number
|
||||
// `NE | `NE1 length of exponent
|
||||
// `NF | `NF1 length of fraction
|
||||
// `BIAS | `BIAS1 exponent's bias value
|
||||
// `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// double and single
|
||||
// single and half
|
||||
|
||||
// Not needed but can also handle:
|
||||
// quad and double
|
||||
// quad and single
|
||||
// quad and half
|
||||
// double and half
|
||||
|
||||
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
|
||||
|
||||
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
|
||||
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
|
||||
|
||||
// extract the fraction, add trailing zeroes to the mantissa if nessisary
|
||||
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
|
||||
// is the exponent non-zero
|
||||
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
|
||||
// - if the original precision had a denormal number convert the exponent value 1
|
||||
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
|
||||
// is the exponent all 1's
|
||||
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin // three floating point precsions supported
|
||||
|
||||
//***need better names for these constants
|
||||
// largest format | larger format | smallest format
|
||||
//---------------------------------------------------
|
||||
// `FLEN | `LEN1 | `LEN2 length of floating point number
|
||||
// `NE | `NE1 | `NE2 length of exponent
|
||||
// `NF | `NF1 | `NF2 length of fraction
|
||||
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
|
||||
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
// Possible combinantions specified by spec:
|
||||
// quad and double and single
|
||||
// double and single and half
|
||||
|
||||
// Not needed but can also handle:
|
||||
// quad and double and half
|
||||
// quad and single and half
|
||||
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: BadNaNBox = 0;
|
||||
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
|
||||
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
|
||||
default: BadNaNBox = 0;
|
||||
endcase
|
||||
|
||||
// extract the sign bit
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Sgn = In[`FLEN-1];
|
||||
`FMT1: Sgn = In[`LEN1-1];
|
||||
`FMT2: Sgn = In[`LEN2-1];
|
||||
default: Sgn = 0;
|
||||
endcase
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Frac = In[`NF-1:0];
|
||||
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
default: Frac = 0;
|
||||
endcase
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
|
||||
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
|
||||
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
|
||||
default: ExpNonZero = 0;
|
||||
endcase
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the larger precision's exponent to use the largest precision's bias
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
|
||||
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
|
||||
default: Exp = 0;
|
||||
endcase
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: ExpMax = &In[`FLEN-2:`NF];
|
||||
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
|
||||
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
|
||||
default: ExpMax = 0;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
|
||||
|
||||
// quad | double | single | half
|
||||
//-------------------------------------------------------------------
|
||||
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
|
||||
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
|
||||
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
|
||||
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
|
||||
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10
|
||||
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: BadNaNBox = 0;
|
||||
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
|
||||
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
|
||||
2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
|
||||
endcase
|
||||
|
||||
// extract sign bit
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: Sgn = In[`Q_LEN-1];
|
||||
2'b01: Sgn = In[`D_LEN-1];
|
||||
2'b00: Sgn = In[`S_LEN-1];
|
||||
2'b10: Sgn = In[`H_LEN-1];
|
||||
endcase
|
||||
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: Frac = In[`Q_NF-1:0];
|
||||
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
endcase
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
|
||||
2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF];
|
||||
endcase
|
||||
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
// 127 = 0000 0111 1111 (subtract this)
|
||||
// 896 = 0011 1000 0000
|
||||
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
|
||||
// dexp = 0bdd dbbb bbbb
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the double precsion exponent into quad precsion
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
|
||||
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
|
||||
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
|
||||
2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero};
|
||||
endcase
|
||||
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (FmtE)
|
||||
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
|
||||
2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// Output logic
|
||||
assign FracZero = ~|Frac; // is the fraction zero?
|
||||
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
|
||||
assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
|
||||
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
|
||||
assign Inf = ExpMax & FracZero; // is the input infinity?
|
||||
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
|
||||
endmodule
|
@ -44,8 +44,8 @@ module bram1p1rw
|
||||
//----------------------------------------------------------------------
|
||||
) (
|
||||
input logic clk,
|
||||
input logic ena,
|
||||
input logic [NUM_COL-1:0] we,
|
||||
input logic we,
|
||||
input logic [NUM_COL-1:0] bwe,
|
||||
input logic [ADDR_WIDTH-1:0] addr,
|
||||
output logic [DATA_WIDTH-1:0] dout,
|
||||
input logic [DATA_WIDTH-1:0] din
|
||||
@ -60,9 +60,9 @@ module bram1p1rw
|
||||
|
||||
always @ (posedge clk) begin
|
||||
dout <= RAM[addr];
|
||||
if(ena) begin
|
||||
if(we) begin
|
||||
for(i=0;i<NUM_COL;i=i+1) begin
|
||||
if(we[i]) begin
|
||||
if(bwe[i]) begin
|
||||
RAM[addr][i*COL_WIDTH +: COL_WIDTH] <= din[i*COL_WIDTH +:COL_WIDTH];
|
||||
end
|
||||
end
|
||||
|
@ -46,11 +46,11 @@ module bram2p1r1w
|
||||
//----------------------------------------------------------------------
|
||||
) (
|
||||
input logic clk,
|
||||
input logic enaA,
|
||||
input logic reA,
|
||||
input logic [ADDR_WIDTH-1:0] addrA,
|
||||
output logic [DATA_WIDTH-1:0] doutA,
|
||||
input logic enaB,
|
||||
input logic [NUM_COL-1:0] weB,
|
||||
input logic weB,
|
||||
input logic [NUM_COL-1:0] bweB,
|
||||
input logic [ADDR_WIDTH-1:0] addrB,
|
||||
input logic [DATA_WIDTH-1:0] dinB
|
||||
);
|
||||
@ -128,15 +128,15 @@ module bram2p1r1w
|
||||
|
||||
// Port-A Operation
|
||||
always @ (posedge clk) begin
|
||||
if(enaA) begin
|
||||
if(reA) begin
|
||||
doutA <= RAM[addrA];
|
||||
end
|
||||
end
|
||||
// Port-B Operation:
|
||||
always @ (posedge clk) begin
|
||||
if(enaB) begin
|
||||
if(weB) begin
|
||||
for(i=0;i<NUM_COL;i=i+1) begin
|
||||
if(weB[i]) begin
|
||||
if(bweB[i]) begin
|
||||
RAM[addrB][i*COL_WIDTH +: COL_WIDTH] <= dinB[i*COL_WIDTH +:COL_WIDTH];
|
||||
end
|
||||
end
|
||||
|
@ -42,27 +42,7 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
|
||||
localparam ADDR_WDITH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
|
||||
bram1p1rw #(`XLEN/8, 8, ADDR_WDITH)
|
||||
memory(.clk, .ena(we), .we(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd));
|
||||
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)];
|
||||
|
||||
// discard bottom 2 or 3 bits of address offset within word or doubleword
|
||||
localparam adrlsb = (`XLEN==64) ? 3 : 2;
|
||||
logic [31:adrlsb] adrmsbs;
|
||||
assign adrmsbs = a[31:adrlsb];
|
||||
|
||||
always_ff @(posedge clk)
|
||||
rd <= RAM[adrmsbs];
|
||||
|
||||
genvar index;
|
||||
for(index = 0; index < `XLEN/8; index++) begin
|
||||
always_ff @(posedge clk) begin
|
||||
if (we & ByteMask[index]) RAM[adrmsbs][8*(index+1)-1:8*index] <= #1 wd[8*(index+1)-1:8*index];
|
||||
end
|
||||
end
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
memory(.clk, .we, .bwe(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd));
|
||||
endmodule
|
||||
|
||||
|
15
pipelined/src/generic/lzc.sv
Normal file
15
pipelined/src/generic/lzc.sv
Normal file
@ -0,0 +1,15 @@
|
||||
//leading zero counter i.e. priority encoder
|
||||
module lzc #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] num,
|
||||
output logic [$clog2(WIDTH+1)-1:0] ZeroCnt
|
||||
);
|
||||
/* verilator lint_off CMPCONST */
|
||||
|
||||
logic [$clog2(WIDTH+1)-1:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH+1))'(WIDTH-1))) i = i+1; // search for leading one
|
||||
ZeroCnt = i;
|
||||
end
|
||||
/* verilator lint_on CMPCONST */
|
||||
endmodule
|
@ -32,13 +32,13 @@
|
||||
|
||||
module hazard(
|
||||
// Detect hazards
|
||||
(* mark_debug = "true" *) input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
|
||||
(* mark_debug = "true" *) input logic BPPredWrongE, CSRWriteFencePendingDEM, RetM, TrapM,
|
||||
(* mark_debug = "true" *) input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
|
||||
(* mark_debug = "true" *) input logic LSUStallM, IFUStallF,
|
||||
(* mark_debug = "true" *) input logic FPUStallD, FStallD,
|
||||
(* mark_debug = "true" *) input logic DivBusyE,FDivBusyE,
|
||||
(* mark_debug = "true" *) input logic EcallFaultM, BreakpointFaultM,
|
||||
(* mark_debug = "true" *) input logic InvalidateICacheM, wfiM, IntPendingM,
|
||||
(* mark_debug = "true" *) input logic wfiM, IntPendingM,
|
||||
// Stall & flush outputs
|
||||
(* mark_debug = "true" *) output logic StallF, StallD, StallE, StallM, StallW,
|
||||
(* mark_debug = "true" *) output logic FlushF, FlushD, FlushE, FlushM, FlushW
|
||||
@ -47,7 +47,6 @@ module hazard(
|
||||
logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause;
|
||||
logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW;
|
||||
|
||||
|
||||
// stalls and flushes
|
||||
// loads: stall for one cycle if the subsequent instruction depends on the load
|
||||
// branches and jumps: flush the next two instructions if the branch is taken in EXE
|
||||
@ -62,10 +61,10 @@ module hazard(
|
||||
|
||||
// *** can stalls be pushed into earlier stages (e.g. no stall after Decode?)
|
||||
|
||||
assign StallFCause = CSRWritePendingDEM & ~(TrapM | RetM | BPPredWrongE);
|
||||
assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
|
||||
// stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);
|
||||
assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM);
|
||||
assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
|
||||
// WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap
|
||||
assign StallMCause = wfiM & (~TrapM & ~IntPendingM);
|
||||
assign StallWCause = LSUStallM | IFUStallF;
|
||||
@ -82,10 +81,10 @@ module hazard(
|
||||
assign FirstUnstalledW = ~StallW & StallM;
|
||||
|
||||
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
|
||||
assign FlushF = BPPredWrongE | InvalidateICacheM;
|
||||
assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE | InvalidateICacheM; // *** does RetM only need to flush if the privilege changes?
|
||||
assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE | InvalidateICacheM; // *** why is BPPredWrongE here, but not needed in simple processor
|
||||
assign FlushM = FirstUnstalledM | TrapM | RetM | InvalidateICacheM;
|
||||
assign FlushF = BPPredWrongE;
|
||||
assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE;
|
||||
assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE; // *** why is BPPredWrongE here, but not needed in simple processor
|
||||
assign FlushM = FirstUnstalledM | TrapM | RetM;
|
||||
// on Trap the memory stage should be flushed going into the W stage,
|
||||
// except if the instruction causing the Trap is an ecall or ebreak.
|
||||
assign FlushW = FirstUnstalledW | (TrapM & ~(BreakpointFaultM | EcallFaultM));
|
||||
|
@ -30,6 +30,15 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module donedet #(parameter WIDTH=64) (
|
||||
input logic [WIDTH-1:0] a, b,
|
||||
output logic eq);
|
||||
|
||||
//assign eq = (a+b == 0); // gives good speed but 3x necessary area
|
||||
// See CMOS VLSI Design 4th Ed. p. 463 K = A+B for K = 0
|
||||
assign eq = ((a ^ b) == {a[WIDTH-2:0], 1'b0} | {b[WIDTH-2:0], 1'b0});
|
||||
endmodule
|
||||
|
||||
module comparator_sub #(parameter WIDTH=64) (
|
||||
input logic [WIDTH-1:0] a, b,
|
||||
output logic [2:0] flags);
|
||||
|
@ -67,7 +67,7 @@ module controller(
|
||||
output logic RegWriteW, // for datapath and Hazard Unit
|
||||
output logic [2:0] ResultSrcW,
|
||||
// Stall during CSRs
|
||||
output logic CSRWritePendingDEM,
|
||||
output logic CSRWriteFencePendingDEM,
|
||||
output logic StoreStallD
|
||||
);
|
||||
|
||||
@ -107,6 +107,8 @@ module controller(
|
||||
logic IEURegWriteE;
|
||||
logic IllegalERegAdrD;
|
||||
logic [1:0] AtomicE;
|
||||
logic FencePendingD, FencePendingE, FencePendingM;
|
||||
|
||||
|
||||
// Extract fields
|
||||
assign OpD = InstrD[6:0];
|
||||
@ -174,10 +176,10 @@ module controller(
|
||||
assign {RegWriteD, ImmSrcD, ALUSrcAD, ALUSrcBD, MemRWD,
|
||||
ResultSrcD, BranchD, ALUOpD, JumpD, ALUResultSrcD, W64D, CSRReadD,
|
||||
PrivilegedD, FenceD, MDUD, AtomicD, unused} = IllegalIEUInstrFaultD ? `CTRLW'b0 : ControlsD;
|
||||
// *** move Privileged, CSRwrite?? Or move controller out of IEU into datapath and handle all instructions
|
||||
|
||||
assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source?
|
||||
assign CSRWriteD = CSRReadD & !(CSRZeroSrcD & InstrD[13]); // Don't write if setting or clearing zeros
|
||||
assign FencePendingD = PrivilegedD & (InstrD[31:25] == 7'b0001001) | FenceD; // possible sfence.vma or fence.i
|
||||
|
||||
// ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra
|
||||
assign sltD = (Funct3D == 3'b010);
|
||||
@ -204,9 +206,9 @@ module controller(
|
||||
flopenrc #(1) controlregD(clk, reset, FlushD, ~StallD, 1'b1, InstrValidD);
|
||||
|
||||
// Execute stage pipeline control register and logic
|
||||
flopenrc #(27) controlregE(clk, reset, FlushE, ~StallE,
|
||||
{RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, InstrValidD},
|
||||
{IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE});
|
||||
flopenrc #(28) controlregE(clk, reset, FlushE, ~StallE,
|
||||
{RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MDUD, AtomicD, InvalidateICacheD, FlushDCacheD, FencePendingD, InstrValidD},
|
||||
{IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE});
|
||||
|
||||
// Branch Logic
|
||||
assign {eqE, ltE, ltuE} = FlagsE;
|
||||
@ -220,16 +222,17 @@ module controller(
|
||||
assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers
|
||||
|
||||
// Memory stage pipeline control register
|
||||
flopenrc #(18) controlregM(clk, reset, FlushM, ~StallM,
|
||||
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE},
|
||||
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, InstrValidM});
|
||||
flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM,
|
||||
{RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE},
|
||||
{RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM});
|
||||
|
||||
// Writeback stage pipeline control register
|
||||
flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW,
|
||||
{RegWriteM, ResultSrcM},
|
||||
{RegWriteW, ResultSrcW});
|
||||
|
||||
assign CSRWritePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM;
|
||||
// Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages
|
||||
assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM;
|
||||
|
||||
assign StoreStallD = MemRWE[0] & ((|MemRWD) | (|AtomicD));
|
||||
endmodule
|
||||
|
@ -71,7 +71,7 @@ module ieu (
|
||||
output logic FPUStallD, LoadStallD, MDUStallD, CSRRdStallD,
|
||||
output logic PCSrcE,
|
||||
output logic CSRReadM, CSRWriteM, PrivilegedM,
|
||||
output logic CSRWritePendingDEM,
|
||||
output logic CSRWriteFencePendingDEM,
|
||||
output logic StoreStallD
|
||||
);
|
||||
|
||||
@ -99,7 +99,7 @@ module ieu (
|
||||
.Funct3E, .MDUE, .W64E, .JumpE, .StallM, .FlushM, .MemRWM,
|
||||
.CSRReadM, .CSRWriteM, .PrivilegedM, .SCE, .AtomicM, .Funct3M,
|
||||
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
|
||||
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWritePendingDEM, .StoreStallD);
|
||||
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
|
||||
|
||||
datapath dp(
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
|
@ -76,7 +76,7 @@ module ifu (
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
input logic [1:0] STATUS_MPP,
|
||||
input logic ITLBWriteF, ITLBFlushF,
|
||||
input logic ITLBWriteF, sfencevmaM,
|
||||
output logic ITLBMissF, InstrDAPageFaultF,
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0],
|
||||
@ -141,6 +141,18 @@ module ifu (
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
if(`ZICSR_SUPPORTED == 1) begin : immu
|
||||
///////////////////////////////////////////
|
||||
// sfence.vma causes TLB flushes
|
||||
///////////////////////////////////////////
|
||||
// sets ITLBFlush to pulse for one cycle of the sfence.vma instruction
|
||||
// In this instr we want to flush the tlb and then do a pagetable walk to update the itlb and continue the program.
|
||||
// But we're still in the stalled sfence instruction, so if itlbflushf == sfencevmaM, tlbflush would never drop and
|
||||
// the tlbwrite would never take place after the pagetable walk. by adding in ~StallMQ, we are able to drop itlbflush
|
||||
// after a cycle AND pulse it for another cycle on any further back-to-back sfences.
|
||||
logic StallMQ, TLBFlush;
|
||||
flopr #(1) StallMReg(.clk, .reset, .d(StallM), .q(StallMQ));
|
||||
assign TLBFlush = sfencevmaM & ~StallMQ;
|
||||
|
||||
mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1))
|
||||
immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
|
||||
.PrivilegeModeW, .DisableTranslation(1'b0),
|
||||
@ -149,7 +161,7 @@ module ifu (
|
||||
.PTE(PTE),
|
||||
.PageTypeWriteVal(PageType),
|
||||
.TLBWrite(ITLBWriteF),
|
||||
.TLBFlush(ITLBFlushF),
|
||||
.TLBFlush,
|
||||
.PhysicalAddress(PCPF),
|
||||
.TLBMiss(ITLBMissF),
|
||||
.Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(),
|
||||
|
@ -55,7 +55,7 @@ module lsu (
|
||||
// cpu privilege
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic BigEndianM,
|
||||
input logic DTLBFlushM,
|
||||
input logic sfencevmaM,
|
||||
// faults
|
||||
output logic LoadPageFaultM, StoreAmoPageFaultM,
|
||||
output logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
@ -161,7 +161,7 @@ module lsu (
|
||||
.PTE,
|
||||
.PageTypeWriteVal(PageType),
|
||||
.TLBWrite(DTLBWriteM),
|
||||
.TLBFlush(DTLBFlushM),
|
||||
.TLBFlush(sfencevmaM),
|
||||
.PhysicalAddress(LSUPAdrM),
|
||||
.TLBMiss(DTLBMissM),
|
||||
.Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(),
|
||||
|
@ -478,10 +478,12 @@ module ppa_priorityencoder #(parameter WIDTH = 8) (
|
||||
output logic [$clog2(WIDTH)-1:0] y);
|
||||
|
||||
int i;
|
||||
always_comb
|
||||
always_comb begin
|
||||
y = 0;
|
||||
for (i=0; i<WIDTH; i++) begin:pri
|
||||
if (a[i]) y= i;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
module ppa_decoder_8 #(parameter WIDTH = 8) (
|
||||
@ -531,15 +533,7 @@ module ppa_mux2_8 #(parameter WIDTH = 8) (
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux3 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? d2 : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux4 #(parameter WIDTH = 8) (
|
||||
module ppa_mux4_8 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
@ -547,15 +541,103 @@ module ppa_mux4 #(parameter WIDTH = 8) (
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux6 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
|
||||
module ppa_mux8_8 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[2] ? (s[0] ? d5 : d4) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
|
||||
assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
|
||||
endmodule
|
||||
|
||||
module ppa_mux8 #(parameter WIDTH = 8) (
|
||||
module ppa_mux2_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux4_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux8_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
|
||||
endmodule
|
||||
|
||||
module ppa_mux2_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux4_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux8_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
|
||||
endmodule
|
||||
|
||||
module ppa_mux2_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux4_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux8_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
|
||||
endmodule
|
||||
|
||||
module ppa_mux2_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux4_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux8_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
@ -867,4 +949,11 @@ module ppa_csa_128 #(parameter WIDTH = 128) (
|
||||
assign sum = a ^ b ^ c;
|
||||
assign carry = (a & (b | c)) | (b & c);
|
||||
|
||||
endmodule
|
||||
|
||||
module ppa_inv_1 #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] a,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = ~a;
|
||||
endmodule
|
@ -39,7 +39,7 @@ module privdec (
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic STATUS_TSR, STATUS_TVM, STATUS_TW,
|
||||
input logic [1:0] STATUS_FS,
|
||||
output logic IllegalInstrFaultM, ITLBFlushF, DTLBFlushM,
|
||||
output logic IllegalInstrFaultM,
|
||||
output logic EcallFaultM, BreakpointFaultM,
|
||||
output logic sretM, mretM, wfiM, sfencevmaM);
|
||||
|
||||
@ -84,9 +84,9 @@ module privdec (
|
||||
// But we're still in the stalled sfence instruction, so if itlbflushf == sfencevmaM, tlbflush would never drop and
|
||||
// the tlbwrite would never take place after the pagetable walk. by adding in ~StallMQ, we are able to drop itlbflush
|
||||
// after a cycle AND pulse it for another cycle on any further back-to-back sfences.
|
||||
flopr #(1) StallMReg(.clk, .reset, .d(StallM), .q(StallMQ));
|
||||
assign ITLBFlushF = sfencevmaM & ~StallMQ;
|
||||
assign DTLBFlushM = sfencevmaM;
|
||||
// flopr #(1) StallMReg(.clk, .reset, .d(StallM), .q(StallMQ));
|
||||
// assign ITLBFlushF = sfencevmaM & ~StallMQ;
|
||||
// assign DTLBFlushM = sfencevmaM;
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Fault on illegal instructions
|
||||
|
@ -38,7 +38,7 @@ module privileged (
|
||||
output logic [`XLEN-1:0] CSRReadValW,
|
||||
output logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||
output logic RetM, TrapM,
|
||||
output logic ITLBFlushF, DTLBFlushM,
|
||||
output logic sfencevmaM,
|
||||
input logic InstrValidM, CommittedM,
|
||||
input logic FRegWriteM, LoadStallD,
|
||||
input logic BPPredDirWrongM,
|
||||
@ -85,7 +85,7 @@ module privileged (
|
||||
logic [`XLEN-1:0] MEDELEG_REGW;
|
||||
logic [11:0] MIDELEG_REGW;
|
||||
|
||||
logic sretM, mretM, sfencevmaM;
|
||||
logic sretM, mretM;
|
||||
logic IllegalCSRAccessM;
|
||||
logic IllegalIEUInstrFaultM;
|
||||
logic IllegalFPUInstrM;
|
||||
@ -99,13 +99,14 @@ module privileged (
|
||||
logic STATUS_MIE, STATUS_SIE;
|
||||
logic [11:0] MIP_REGW, MIE_REGW;
|
||||
logic [1:0] NextPrivilegeModeM;
|
||||
logic DelegateM;
|
||||
|
||||
///////////////////////////////////////////
|
||||
// track the current privilege level
|
||||
///////////////////////////////////////////
|
||||
|
||||
privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .InterruptM, .CauseM,
|
||||
.MEDELEG_REGW, .MIDELEG_REGW, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW);
|
||||
privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM,
|
||||
.STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// decode privileged instructions
|
||||
@ -114,7 +115,7 @@ module privileged (
|
||||
privdec pmd(.clk, .reset, .StallM, .InstrM(InstrM[31:20]),
|
||||
.PrivilegedM, .IllegalIEUInstrFaultM, .IllegalCSRAccessM, .IllegalFPUInstrM,
|
||||
.PrivilegeModeW, .STATUS_TSR, .STATUS_TVM, .STATUS_TW, .STATUS_FS, .IllegalInstrFaultM,
|
||||
.ITLBFlushF, .DTLBFlushM, .EcallFaultM, .BreakpointFaultM,
|
||||
.EcallFaultM, .BreakpointFaultM,
|
||||
.sretM, .mretM, .wfiM, .sfencevmaM);
|
||||
|
||||
///////////////////////////////////////////
|
||||
@ -158,11 +159,11 @@ module privileged (
|
||||
.LoadPageFaultM, .StoreAmoPageFaultM,
|
||||
.mretM, .sretM,
|
||||
.PrivilegeModeW,
|
||||
.MIP_REGW, .MIE_REGW, .MIDELEG_REGW,
|
||||
.MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW,
|
||||
.STATUS_MIE, .STATUS_SIE,
|
||||
.InstrValidM, .CommittedM,
|
||||
.TrapM, .RetM,
|
||||
.InterruptM, .IntPendingM,
|
||||
.InterruptM, .IntPendingM, .DelegateM,
|
||||
.CauseM);
|
||||
endmodule
|
||||
|
||||
|
@ -33,30 +33,22 @@
|
||||
|
||||
module privmode (
|
||||
input logic clk, reset,
|
||||
input logic StallW, TrapM, mretM, sretM, InterruptM,
|
||||
input logic [`LOG_XLEN-1:0] CauseM,
|
||||
input logic [`XLEN-1:0] MEDELEG_REGW,
|
||||
input logic [11:0] MIDELEG_REGW,
|
||||
input logic StallW, TrapM, mretM, sretM,
|
||||
input logic DelegateM,
|
||||
input logic [1:0] STATUS_MPP,
|
||||
input logic STATUS_SPP,
|
||||
output logic [1:0] NextPrivilegeModeM, PrivilegeModeW
|
||||
);
|
||||
|
||||
if (`U_SUPPORTED) begin:privmode
|
||||
logic md;
|
||||
|
||||
// get bits of DELEG registers based on CAUSE
|
||||
assign md = InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM];
|
||||
|
||||
// PrivilegeMode FSM
|
||||
always_comb begin
|
||||
if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8)
|
||||
if (`S_SUPPORTED & md & (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE))
|
||||
NextPrivilegeModeM = `S_MODE;
|
||||
else NextPrivilegeModeM = `M_MODE;
|
||||
end else if (mretM) NextPrivilegeModeM = STATUS_MPP;
|
||||
else if (sretM) NextPrivilegeModeM = {1'b0, STATUS_SPP};
|
||||
else NextPrivilegeModeM = PrivilegeModeW;
|
||||
if (`S_SUPPORTED & DelegateM) NextPrivilegeModeM = `S_MODE;
|
||||
else NextPrivilegeModeM = `M_MODE;
|
||||
end else if (mretM) NextPrivilegeModeM = STATUS_MPP;
|
||||
else if (sretM) NextPrivilegeModeM = {1'b0, STATUS_SPP};
|
||||
else NextPrivilegeModeM = PrivilegeModeW;
|
||||
end
|
||||
|
||||
flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, `M_MODE, PrivilegeModeW);
|
||||
|
@ -39,11 +39,12 @@ module trap (
|
||||
(* mark_debug = "true" *) input logic LoadPageFaultM, StoreAmoPageFaultM,
|
||||
(* mark_debug = "true" *) input logic mretM, sretM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
(* mark_debug = "true" *) input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW,
|
||||
(* mark_debug = "true" *) input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW,
|
||||
input logic [`XLEN-1:0] MEDELEG_REGW,
|
||||
input logic STATUS_MIE, STATUS_SIE,
|
||||
input logic InstrValidM, CommittedM,
|
||||
output logic TrapM, RetM,
|
||||
output logic InterruptM, IntPendingM,
|
||||
output logic InterruptM, IntPendingM, DelegateM,
|
||||
output logic [`LOG_XLEN-1:0] CauseM
|
||||
);
|
||||
|
||||
@ -63,6 +64,8 @@ module trap (
|
||||
assign IntPendingM = |PendingIntsM;
|
||||
assign ValidIntsM = {12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW;
|
||||
assign InterruptM = (|ValidIntsM) && InstrValidM && ~(CommittedM); // *** RT. CommittedM is a temporary hack to prevent integer division from having an interrupt during divide.
|
||||
assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) &
|
||||
(PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Trigger Traps and RET
|
||||
|
98
pipelined/src/uncore/ahbapbbridge.sv
Normal file
98
pipelined/src/uncore/ahbapbbridge.sv
Normal file
@ -0,0 +1,98 @@
|
||||
///////////////////////////////////////////
|
||||
// ahbapbbridge.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu & Nic Lucio 7 June 2022
|
||||
//
|
||||
// Purpose: AHB to APB bridge
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ahbapbbridge #(PERIPHS = 2) (
|
||||
input logic HCLK, HRESETn,
|
||||
input logic [PERIPHS-1:0] HSEL,
|
||||
input logic [31:0] HADDR,
|
||||
input logic [`XLEN-1:0] HWDATA,
|
||||
input logic HWRITE,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic HREADY,
|
||||
output logic [`XLEN-1:0] HRDATA,
|
||||
output logic HRESP, HREADYOUT,
|
||||
output logic PCLK, PRESETn,
|
||||
output logic [PERIPHS-1:0] PSEL,
|
||||
output logic PWRITE,
|
||||
output logic PENABLE,
|
||||
output logic [31:0] PADDR,
|
||||
output logic [`XLEN-1:0] PWDATA,
|
||||
input logic [PERIPHS-1:0] PREADY,
|
||||
input var [`XLEN-1:0][PERIPHS-1:0] PRDATA
|
||||
);
|
||||
|
||||
logic activeTrans;
|
||||
logic initTrans, initTransSel, initTransSelD;
|
||||
logic nextPENABLE;
|
||||
|
||||
// convert AHB to APB signals
|
||||
assign PCLK = HCLK;
|
||||
assign PRESETn = HRESETn;
|
||||
|
||||
// identify start of a transaction
|
||||
assign activeTrans = (HTRANS == 2'b10); // only accept nonsequential transactions
|
||||
assign initTrans = activeTrans & HREADY; // start a transaction when the bus is ready and an active transaction is requested
|
||||
assign initTransSel = initTrans & |HSEL; // capture data and address if any of the peripherals are selected
|
||||
|
||||
// delay AHB Address phase signals to align with AHB Data phase because APB expects them at the same time
|
||||
flopenr #(32) addrreg(HCLK, ~HRESETn, initTransSel, HADDR, PADDR);
|
||||
flopenr #(1) writereg(HCLK, ~HRESETn, initTransSel, HWRITE, PWRITE);
|
||||
// enable selreg with iniTrans rather than initTransSel so PSEL can turn off
|
||||
flopenr #(PERIPHS) selreg(HCLK, ~HRESETn, initTrans, HSEL & {PERIPHS{activeTrans}}, PSEL);
|
||||
// AHB Data phase signal doesn't need delay. Note that HWDATA is guaranteed to remain stable until READY is asserted
|
||||
assign PWDATA = HWDATA;
|
||||
|
||||
// enable logic: goes high a cycle after initTrans, then back low on cycle after desired PREADY is asserted
|
||||
// cycle1: AHB puts HADDR, HWRITE, HSEL on bus. initTrans is 1, and these are captured
|
||||
// cycle2: AHB puts HWDATA on the bus. This effectively extends the setup phase
|
||||
// cycle3: bridge raises PENABLE. Peripheral typically responds with PREADY.
|
||||
// Read occurs by end of cycle. Write occurs at end of cycle.
|
||||
flopr #(1) inittransreg(HCLK, ~HRESETn, initTransSel, initTransSelD);
|
||||
assign nextPENABLE = PENABLE ? ~HREADY : initTransSelD;
|
||||
flopr #(1) enablereg(HCLK, ~HRESETn, nextPENABLE, PENABLE);
|
||||
|
||||
// result and ready multiplexer
|
||||
int i;
|
||||
always_comb
|
||||
for (i=0; i<PERIPHS; i++) begin
|
||||
// no peripheral selected: read 0, indicate ready
|
||||
HRDATA = 0;
|
||||
HREADYOUT = 1;
|
||||
if (PSEL[i]) begin // highest numbered peripheral has priority, but multiple PSEL should never be asserted
|
||||
HRDATA = PRDATA[i];
|
||||
HREADYOUT = PREADY[i];
|
||||
end
|
||||
end
|
||||
|
||||
// resp logic
|
||||
assign HRESP = 0; // bridge never indicates errors
|
||||
endmodule
|
||||
|
146
pipelined/src/uncore/gpio_apb.sv
Normal file
146
pipelined/src/uncore/gpio_apb.sv
Normal file
@ -0,0 +1,146 @@
|
||||
///////////////////////////////////////////
|
||||
// gpio_apb.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 14 January 2021
|
||||
// Modified: bbracker@hmc.edu 15 Apr. 2021
|
||||
//
|
||||
// Purpose: General Purpose I/O peripheral
|
||||
// See FE310-G002-Manual-v19p05 for specifications
|
||||
// No interrupts, drive strength, or pull-ups supported
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module gpio_apb (
|
||||
input logic PCLK, PRESETn,
|
||||
input logic PSEL,
|
||||
input logic [7:0] PADDR,
|
||||
input logic [`XLEN-1:0] PWDATA,
|
||||
input logic PWRITE,
|
||||
input logic PENABLE,
|
||||
output logic [`XLEN-1:0] PRDATA,
|
||||
output logic PREADY,
|
||||
input logic [31:0] GPIOPinsIn,
|
||||
output logic [31:0] GPIOPinsOut, GPIOPinsEn,
|
||||
output logic GPIOIntr);
|
||||
|
||||
logic [31:0] input0d, input1d, input2d, input3d;
|
||||
logic [31:0] input_val, input_en, output_en, output_val;
|
||||
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip;
|
||||
|
||||
logic [7:0] entry;
|
||||
logic [31:0] Din, Dout;
|
||||
logic memwrite;
|
||||
|
||||
// APB I/O
|
||||
assign entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
|
||||
assign memwrite = PWRITE & PENABLE; // only write in access phase
|
||||
assign PREADY = PENABLE; // GPIO never takes >1 cycle to respond
|
||||
|
||||
// account for subword read/write circuitry
|
||||
// -- Note GPIO registers are 32 bits no matter what; access them with LW SW.
|
||||
// (At least that's what I think when FE310 spec says "only naturally aligned 32-bit accesses are supported")
|
||||
if (`XLEN == 64) begin
|
||||
assign Din = entry[2] ? PWDATA[63:32] : PWDATA[31:0];
|
||||
assign PRDATA = entry[2] ? {Dout,32'b0} : {32'b0,Dout};
|
||||
end else begin // 32-bit
|
||||
assign Din = PWDATA[31:0];
|
||||
assign PRDATA = Dout;
|
||||
end
|
||||
|
||||
// register access
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) begin // asynch reset
|
||||
input_en <= 0;
|
||||
output_en <= 0;
|
||||
// *** synch reset not yet implemented [DH: can we delete this comment? Check if a sync reset is required]
|
||||
output_val <= #1 0;
|
||||
rise_ie <= #1 0;
|
||||
rise_ip <= #1 0;
|
||||
fall_ie <= #1 0;
|
||||
fall_ip <= #1 0;
|
||||
high_ie <= #1 0;
|
||||
high_ip <= #1 0;
|
||||
low_ie <= #1 0;
|
||||
low_ip <= #1 0;
|
||||
end else begin // writes
|
||||
// According to FE310 spec: Once the interrupt is pending, it will remain set until a 1 is written to the *_ip register at that bit.
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
if (memwrite)
|
||||
case(entry)
|
||||
8'h04: input_en <= #1 Din;
|
||||
8'h08: output_en <= #1 Din;
|
||||
8'h0C: output_val <= #1 Din;
|
||||
8'h18: rise_ie <= #1 Din;
|
||||
8'h20: fall_ie <= #1 Din;
|
||||
8'h28: high_ie <= #1 Din;
|
||||
8'h30: low_ie <= #1 Din;
|
||||
8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
|
||||
endcase
|
||||
/* verilator lint_on CASEINCOMPLETE */
|
||||
|
||||
// interrupts can be cleared by writing corresponding bits to a register
|
||||
if (memwrite & entry == 8'h1C) rise_ip <= rise_ip & ~Din;
|
||||
else rise_ip <= rise_ip | (input2d & ~input3d);
|
||||
if (memwrite & (entry == 8'h24)) fall_ip <= fall_ip & ~Din;
|
||||
else fall_ip <= fall_ip | (~input2d & input3d);
|
||||
if (memwrite & (entry == 8'h2C)) high_ip <= high_ip & ~Din;
|
||||
else high_ip <= high_ip | input3d;
|
||||
if (memwrite & (entry == 8'h34)) low_ip <= low_ip & ~Din;
|
||||
else low_ip <= low_ip | ~input3d;
|
||||
|
||||
case(entry) // flop to sample inputs
|
||||
8'h00: Dout <= #1 input_val;
|
||||
8'h04: Dout <= #1 input_en;
|
||||
8'h08: Dout <= #1 output_en;
|
||||
8'h0C: Dout <= #1 output_val;
|
||||
8'h18: Dout <= #1 rise_ie;
|
||||
8'h1C: Dout <= #1 rise_ip;
|
||||
8'h20: Dout <= #1 fall_ie;
|
||||
8'h24: Dout <= #1 fall_ip;
|
||||
8'h28: Dout <= #1 high_ie;
|
||||
8'h2C: Dout <= #1 high_ip;
|
||||
8'h30: Dout <= #1 low_ie;
|
||||
8'h34: Dout <= #1 low_ip;
|
||||
8'h40: Dout <= #1 0; // OUT_XOR reads as 0
|
||||
default: Dout <= #1 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
// chip i/o
|
||||
// connect OUT to IN for loopback testing
|
||||
if (`GPIO_LOOPBACK_TEST) assign input0d = ((output_en & GPIOPinsOut) | (~output_en & GPIOPinsIn)) & input_en;
|
||||
else assign input0d = GPIOPinsIn & input_en;
|
||||
|
||||
// synchroninzer for inputs
|
||||
flop #(32) sync1(PCLK,input0d,input1d);
|
||||
flop #(32) sync2(PCLK,input1d,input2d);
|
||||
flop #(32) sync3(PCLK,input2d,input3d);
|
||||
assign input_val = input3d;
|
||||
assign GPIOPinsOut = output_val;
|
||||
assign GPIOPinsEn = output_en;
|
||||
|
||||
assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};
|
||||
endmodule
|
||||
|
@ -49,21 +49,24 @@ module ram #(parameter BASE=0, RANGE = 65535) (
|
||||
// 3. implement burst.
|
||||
// 4. remove the configurable latency.
|
||||
|
||||
logic [`XLEN/8-1:0] ByteMaskM;
|
||||
logic [31:0] HWADDR, A;
|
||||
logic prevHREADYRam, risingHREADYRam;
|
||||
logic [`XLEN/8-1:0] ByteMask;
|
||||
logic [31:0] HADDRD, RamAddr;
|
||||
//logic prevHREADYRam, risingHREADYRam;
|
||||
logic initTrans;
|
||||
logic memwrite;
|
||||
logic [3:0] busycount;
|
||||
logic memwrite, memwriteD;
|
||||
logic nextHREADYRam;
|
||||
//logic [3:0] busycount;
|
||||
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM));
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
|
||||
|
||||
assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00);
|
||||
assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); // *** add burst support, or disable on busy
|
||||
assign memwrite = initTrans & HWRITE;
|
||||
|
||||
// *** this seems like a weird way to use reset
|
||||
flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite);
|
||||
flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A);
|
||||
// busy FSM to extend READY signal
|
||||
flopen #(1) memwritereg(HCLK, initTrans | ~HRESETn, memwrite, memwriteD); // probably drop ~HRESETn in all this
|
||||
flopen #(32) haddrreg(HCLK, initTrans | ~HRESETn, HADDR, HADDRD);
|
||||
|
||||
/* // busy FSM to extend READY signal
|
||||
always @(posedge HCLK, negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
busycount <= 0;
|
||||
@ -79,28 +82,38 @@ module ram #(parameter BASE=0, RANGE = 65535) (
|
||||
busycount <= busycount + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end */
|
||||
|
||||
|
||||
assign nextHREADYRam = ~(memwriteD & ~memwrite);
|
||||
flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
|
||||
// assign HREADYRam = ~(memwriteD & ~memwrite);
|
||||
assign HRESPRam = 0; // OK
|
||||
|
||||
localparam ADDR_WDITH = $clog2(RANGE/8);
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
// Rising HREADY edge detector
|
||||
/* // Rising HREADY edge detector
|
||||
// Indicates when ram is finishing up
|
||||
// Needed because HREADY may go high for other reasons,
|
||||
// and we only want to write data when finishing up.
|
||||
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
|
||||
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;
|
||||
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
|
||||
|
||||
always @(posedge HCLK)
|
||||
HWADDR <= #1 A;
|
||||
|
||||
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
|
||||
memory(.clk(HCLK), .enaA(1'b1),
|
||||
/*
|
||||
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
|
||||
memory(.clk(HCLK), .reA(1'b1),
|
||||
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
|
||||
.enaB(memwrite & risingHREADYRam), .weB(ByteMaskM),
|
||||
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA));
|
||||
|
||||
|
||||
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
|
||||
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
|
||||
|
||||
|
||||
|
||||
// On writes, use address delayed by one cycle to sync with HWDATA
|
||||
mux2 #(32) adrmux(HADDR, HADDRD, memwriteD, RamAddr);
|
||||
|
||||
// single-ported RAM
|
||||
bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
|
||||
memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));
|
||||
endmodule
|
||||
|
||||
|
107
pipelined/src/uncore/ram_orig.sv
Normal file
107
pipelined/src/uncore/ram_orig.sv
Normal file
@ -0,0 +1,107 @@
|
||||
///////////////////////////////////////////
|
||||
// ram_orig.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 9 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: On-chip RAM, external to core
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ram_orig #(parameter BASE=0, RANGE = 65535) (
|
||||
input logic HCLK, HRESETn,
|
||||
input logic HSELRam,
|
||||
input logic [31:0] HADDR,
|
||||
input logic HWRITE,
|
||||
input logic HREADY,
|
||||
input logic [1:0] HTRANS,
|
||||
input logic [`XLEN-1:0] HWDATA,
|
||||
input logic [3:0] HSIZED,
|
||||
output logic [`XLEN-1:0] HREADRam,
|
||||
output logic HRESPRam, HREADYRam
|
||||
);
|
||||
|
||||
// Desired changes.
|
||||
// 1. find a way to merge read and write address into 1 port.
|
||||
// 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
|
||||
// 3. implement burst.
|
||||
// 4. remove the configurable latency.
|
||||
|
||||
logic [`XLEN/8-1:0] ByteMaskM;
|
||||
logic [31:0] HWADDR, A;
|
||||
logic prevHREADYRam, risingHREADYRam;
|
||||
logic initTrans;
|
||||
logic memwrite;
|
||||
logic [3:0] busycount;
|
||||
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM));
|
||||
|
||||
assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00);
|
||||
|
||||
// *** this seems like a weird way to use reset
|
||||
flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite);
|
||||
flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A);
|
||||
|
||||
// busy FSM to extend READY signal
|
||||
always @(posedge HCLK, negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else begin
|
||||
if (initTrans) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else if (~HREADYRam) begin
|
||||
if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2
|
||||
HREADYRam <= #1 1;
|
||||
end else begin
|
||||
busycount <= busycount + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign HRESPRam = 0; // OK
|
||||
|
||||
localparam ADDR_WDITH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
// Rising HREADY edge detector
|
||||
// Indicates when ram is finishing up
|
||||
// Needed because HREADY may go high for other reasons,
|
||||
// and we only want to write data when finishing up.
|
||||
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
|
||||
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;
|
||||
|
||||
always @(posedge HCLK)
|
||||
HWADDR <= #1 A;
|
||||
|
||||
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
|
||||
memory(.clk(HCLK), .reA(1'b1),
|
||||
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
|
||||
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
|
||||
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA));
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -92,7 +92,7 @@ module uncore (
|
||||
// generate
|
||||
// on-chip RAM
|
||||
if (`RAM_SUPPORTED) begin : ram
|
||||
ram #(
|
||||
ram_orig #(
|
||||
.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
|
||||
.HCLK, .HRESETn,
|
||||
.HSELRam, .HADDR,
|
||||
@ -102,7 +102,7 @@ module uncore (
|
||||
end
|
||||
|
||||
if (`BOOTROM_SUPPORTED) begin : bootrom
|
||||
ram #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE))
|
||||
ram_orig #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE))
|
||||
bootrom(
|
||||
.HCLK, .HRESETn,
|
||||
.HSELRam(HSELBootRom), .HADDR,
|
||||
@ -194,7 +194,7 @@ module uncore (
|
||||
({`XLEN{HSELSDCD}} & HREADSDC);
|
||||
|
||||
assign HRESP = HSELRamD & HRESPRam |
|
||||
HSELEXTD & HRESPEXT |
|
||||
HSELEXTD & HRESPEXT |
|
||||
HSELCLINTD & HRESPCLINT |
|
||||
HSELPLICD & HRESPPLIC |
|
||||
HSELGPIOD & HRESPGPIO |
|
||||
@ -203,7 +203,7 @@ module uncore (
|
||||
HSELSDC & HRESPSDC;
|
||||
|
||||
assign HREADY = HSELRamD & HREADYRam |
|
||||
HSELEXTD & HREADYEXT |
|
||||
HSELEXTD & HREADYEXT |
|
||||
HSELCLINTD & HREADYCLINT |
|
||||
HSELPLICD & HREADYPLIC |
|
||||
HSELGPIOD & HREADYGPIO |
|
||||
|
@ -82,7 +82,7 @@ module wallypipelinedcore (
|
||||
logic StoreAmoMisalignedFaultM, StoreAmoAccessFaultM;
|
||||
logic InvalidateICacheM, FlushDCacheM;
|
||||
logic PCSrcE;
|
||||
logic CSRWritePendingDEM;
|
||||
logic CSRWriteFencePendingDEM;
|
||||
logic DivBusyE;
|
||||
logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD;
|
||||
logic SquashSCW;
|
||||
@ -101,7 +101,6 @@ module wallypipelinedcore (
|
||||
|
||||
// memory management unit signals
|
||||
logic ITLBWriteF;
|
||||
logic ITLBFlushF, DTLBFlushM;
|
||||
logic ITLBMissF;
|
||||
logic [`XLEN-1:0] SATP_REGW;
|
||||
logic STATUS_MXR, STATUS_SUM, STATUS_MPRV;
|
||||
@ -109,7 +108,7 @@ module wallypipelinedcore (
|
||||
logic [1:0] PrivilegeModeW;
|
||||
logic [`XLEN-1:0] PTE;
|
||||
logic [1:0] PageType;
|
||||
logic wfiM, IntPendingM;
|
||||
logic sfencevmaM, wfiM, IntPendingM;
|
||||
logic SelHPTW;
|
||||
|
||||
// PMA checker signals
|
||||
@ -196,7 +195,7 @@ module wallypipelinedcore (
|
||||
// mmu management
|
||||
.PrivilegeModeW, .PTE, .PageType, .SATP_REGW,
|
||||
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV,
|
||||
.STATUS_MPP, .ITLBWriteF, .ITLBFlushF,
|
||||
.STATUS_MPP, .ITLBWriteF, .sfencevmaM,
|
||||
.ITLBMissF,
|
||||
|
||||
// pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H
|
||||
@ -238,7 +237,7 @@ module wallypipelinedcore (
|
||||
.FPUStallD, .LoadStallD, .MDUStallD, .CSRRdStallD,
|
||||
.PCSrcE,
|
||||
.CSRReadM, .CSRWriteM, .PrivilegedM,
|
||||
.CSRWritePendingDEM, .StoreStallD
|
||||
.CSRWriteFencePendingDEM, .StoreStallD
|
||||
|
||||
); // integer execution unit: integer register file, datapath and controller
|
||||
|
||||
@ -268,7 +267,7 @@ module wallypipelinedcore (
|
||||
.STATUS_MPRV, // from csr
|
||||
.STATUS_MPP, // from csr
|
||||
|
||||
.DTLBFlushM, // connects to privilege
|
||||
.sfencevmaM, // connects to privilege
|
||||
.LoadPageFaultM, // connects to privilege
|
||||
.StoreAmoPageFaultM, // connects to privilege
|
||||
.LoadMisalignedFaultM, // connects to privilege
|
||||
@ -310,13 +309,13 @@ module wallypipelinedcore (
|
||||
|
||||
|
||||
hazard hzu(
|
||||
.BPPredWrongE, .CSRWritePendingDEM, .RetM, .TrapM,
|
||||
.BPPredWrongE, .CSRWriteFencePendingDEM, .RetM, .TrapM,
|
||||
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
|
||||
.LSUStallM, .IFUStallF,
|
||||
.FPUStallD, .FStallD,
|
||||
.DivBusyE, .FDivBusyE,
|
||||
.EcallFaultM, .BreakpointFaultM,
|
||||
.InvalidateICacheM, .wfiM, .IntPendingM,
|
||||
.wfiM, .IntPendingM,
|
||||
// Stall & flush outputs
|
||||
.StallF, .StallD, .StallE, .StallM, .StallW,
|
||||
.FlushF, .FlushD, .FlushE, .FlushM, .FlushW
|
||||
@ -330,7 +329,7 @@ module wallypipelinedcore (
|
||||
.CSRReadM, .CSRWriteM, .SrcAM, .PCM,
|
||||
.InstrM, .CSRReadValW, .PrivilegedNextPCM,
|
||||
.RetM, .TrapM,
|
||||
.ITLBFlushF, .DTLBFlushM,
|
||||
.sfencevmaM,
|
||||
.InstrValidM, .CommittedM,
|
||||
.FRegWriteM, .LoadStallD,
|
||||
.BPPredDirWrongM, .BTBPredPCWrongM,
|
||||
@ -359,8 +358,7 @@ module wallypipelinedcore (
|
||||
assign RetM = 0;
|
||||
assign TrapM = 0;
|
||||
assign wfiM = 0;
|
||||
assign ITLBFlushF = 0;
|
||||
assign DTLBFlushM = 0;
|
||||
assign sfencevmaM = 0;
|
||||
assign BigEndianM = 0;
|
||||
end
|
||||
if (`M_SUPPORTED) begin:mdu
|
||||
|
@ -1,13 +1,16 @@
|
||||
all: sqrttestgen testgen
|
||||
all: sqrttestgen testgen qst2
|
||||
|
||||
sqrttestgen: sqrttestgen.c
|
||||
gcc sqrttestgen.c -lm -o sqrttestgen
|
||||
|
||||
|
||||
testgen: testgen.c
|
||||
gcc testgen.c -lm -o testgen
|
||||
|
||||
qst2: qst2.c
|
||||
gcc qst2.c -lm -o qst2
|
||||
gcc -lm -o testgen testgen.c
|
||||
./testgen
|
||||
|
||||
|
||||
exptestgen: exptestgen.c
|
||||
gcc -lm -o exptestgen exptestgen.c
|
||||
./exptestgen
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user