mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 18:25:27 +00:00
fixed merge conflicts
This commit is contained in:
commit
80315fedff
Binary file not shown.
@ -1,15 +1,39 @@
|
||||
# Makefile added 1/20/22 David_Harris@hmc.edu
|
||||
# Compile Embench for Wally
|
||||
|
||||
all: Makefile
|
||||
../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc
|
||||
./benchmark_size.py
|
||||
./benchmark_speed.py
|
||||
all: build sim
|
||||
|
||||
# view with
|
||||
# more `ls -t | head -1`
|
||||
allClean: clean all
|
||||
|
||||
build:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles"
|
||||
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
|
||||
sim: modelSimBuild size speed
|
||||
|
||||
modelSimBuild: objdump
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
|
||||
|
||||
size:
|
||||
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
|
||||
|
||||
speed:
|
||||
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
|
||||
|
||||
objdump:
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S "$$f" > "$$f.objdump"; done
|
||||
|
||||
clean:
|
||||
rm -rf ../../addins/embench-iot/bd_speed/
|
||||
rm -rf ../../addins/embench-iot/bd_size/
|
||||
|
||||
# std:
|
||||
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
|
||||
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
|
||||
# --dummy-libs="libgcc libm libc"
|
||||
# --cflags "-O2 -g -nostartfiles"
|
||||
|
||||
|
||||
#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
|
||||
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
|
||||
# --user-libs="-lm"
|
||||
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
|
||||
|
@ -1,7 +0,0 @@
|
||||
# Makefile added 1/20/22 David_Harris@hmc.edu
|
||||
# Compile Embench for Wally
|
||||
|
||||
all: Makefile
|
||||
./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc
|
||||
./benchmark_size.py
|
||||
./benchmark_speed.py
|
@ -32,7 +32,7 @@ vlib work
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
# $num = the added words after the call
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697
|
||||
|
||||
vsim -voptargs=+acc work.testbenchfp -G TEST=$2
|
||||
|
||||
|
@ -46,7 +46,7 @@ configs = [
|
||||
]
|
||||
def getBuildrootTC(short):
|
||||
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
|
||||
MAX_EXPECTED = 246000000
|
||||
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
|
||||
if short:
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
|
||||
BRgrepstr=str(INSTR_LIMIT)+" instructions"
|
||||
|
@ -1,2 +1,2 @@
|
||||
vsim -do "do wally-pipelined.do rv64gc imperas64f"
|
||||
vsim -do "do wally-pipelined.do rv64gc imperas64d"
|
||||
|
||||
|
@ -51,7 +51,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
|
||||
#vsim -coverage -lib work_$2 workopt_$2
|
||||
|
||||
# power add generates the logging necessary for saif generation.
|
||||
# power add generates the logging necessary for said generation.
|
||||
# power add -r /dut/core/*
|
||||
run -all
|
||||
# power off -r /dut/core/*
|
||||
|
@ -11,97 +11,97 @@ module fctrl (
|
||||
output logic FDivStartD, // Start division or squareroot
|
||||
output logic [1:0] FResultSelD, // select result to be written to fp register
|
||||
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [2:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
|
||||
output logic FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
);
|
||||
|
||||
`define FCTRLW 14
|
||||
`define FCTRLW 13
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
|
||||
ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1;
|
||||
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
|
||||
else case(OpD)
|
||||
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_000_000_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_00_001_000_00_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_010_000_00_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_011_000_00_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_000_00_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_000_00_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_000_00_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_000_00_0_0; // fnmadd
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
|
||||
7'b1010011: casez(Funct7D)
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_000_00_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_000_00_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_000_00_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_000_00_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_000_00_1_0; // fsqrt
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_000_001_00_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_001_001_00_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_11_010_001_00_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b00101??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_111_010_00_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_101_010_00_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b10100??: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_11_010_010_00_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_11_001_010_00_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_11_011_010_00_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_000_10_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_000_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_000_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.s.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])//***reduce resSel
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_000_00_0_0; // fmv.w.x
|
||||
7'b010000?: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.s.d
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.d.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.d
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_000_00_0_0; // fmv.d.x
|
||||
//7'b0100001: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_100_00_0_1; // non-implemented instruction
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
|
||||
// unswizzle control bits
|
||||
@ -119,7 +119,7 @@ module fctrl (
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : FResSelD == 3'b100 | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
|
||||
// FResultSel:
|
||||
// 000 - ReadRes - load
|
||||
|
803
pipelined/src/fpu/fcvt.sv
Normal file
803
pipelined/src/fpu/fcvt.sv
Normal file
@ -0,0 +1,803 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
|
||||
module fcvt (
|
||||
input logic XSgnE, // input's sign
|
||||
input logic [`NE-1:0] XExpE, // input's exponent
|
||||
input logic [`NF:0] XManE, // input's fraction
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, // integer input - from IEU
|
||||
input logic [2:0] FOpCtrlE, // choose which opperation (look below for values)
|
||||
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZeroE, // is the input zero
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic XInfE, // is the input infinity
|
||||
input logic XNaNE, // is the input a NaN
|
||||
input logic XSNaNE, // is the input a signaling NaN
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
|
||||
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
|
||||
output logic [4:0] CvtFlgE // the conversion's flags
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
// fp->fp conversions: {0, output precision} - only one of the operations writes to the int register
|
||||
// half - 10
|
||||
// single - 00
|
||||
// double - 01
|
||||
// quad - 11
|
||||
// int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?}
|
||||
// bit 2 bit 1 bit 0
|
||||
// for example: signed long -> single floating point has the OpCode 101
|
||||
|
||||
// (FF) fp -> fp coversion signals
|
||||
// (IF) int -> fp coversion signals
|
||||
// (FI) fp -> int coversion signals
|
||||
|
||||
|
||||
logic [`FPSIZES/3:0] OutFmt; // format of the output
|
||||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`NE:0] CalcExp; // the calculated expoent
|
||||
logic [$clog2(`LGLEN)-1:0] ShiftAmt; // how much to shift by
|
||||
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
|
||||
logic ResDenormUf;// does the result underflow or is denormalized
|
||||
logic ResUf; // does the result underflow
|
||||
logic [`LGLEN+`NF:0] Shifted; // the shifted result
|
||||
logic [`NE-2:0] NewBias; // the bias of the final result
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic ResSgn; // the result's sign
|
||||
logic Sticky; // sticky bit - for rounding
|
||||
logic Round; // round bit - for rounding
|
||||
logic LSBFrac; // the least significant bit of the fraction - for rounding
|
||||
logic CalcPlus1; // the calculated plus 1
|
||||
logic Plus1; // add one to the final result?
|
||||
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
|
||||
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
|
||||
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
|
||||
logic [`NF-1:0] ResFrac; // the result's fraction
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic Overflow, Underflow, Inexact, Invalid; // flags
|
||||
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
|
||||
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
|
||||
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
|
||||
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
|
||||
logic KillRes; // kill the result?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
assign Signed = FOpCtrlE[0];
|
||||
assign Int64 = FOpCtrlE[1];
|
||||
assign IntToFp = FOpCtrlE[2];
|
||||
assign ToInt = FWriteIntE;
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - int -> fp: FmtE contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp ? FmtE : FOpCtrlE[1:0];
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// negation
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// 1) negate the input if the input is a negitive singed integer
|
||||
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
|
||||
|
||||
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// lzc
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | lzcIn | 0's if nessisary |
|
||||
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{LzcIn, {`NF+1{1'b0}}};
|
||||
// kill the shift if it's negitive
|
||||
// select the amount to shift by
|
||||
// fp -> int:
|
||||
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
|
||||
// - don't shift if supposed to shift right (underflowed or denorm input)
|
||||
// denormalized/undeflowed result fp -> fp:
|
||||
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
|
||||
// ??? -> fp:
|
||||
// - shift left by ZeroCnt+1 - to shift till the result is normalized
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] :
|
||||
(ZeroCnt+1)&{$clog2(`LGLEN){XDenormE|IntToFp}};
|
||||
|
||||
// shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// *** possible optimizaations:
|
||||
// - if subtracting exp by bias only the msb needs a full adder, the rest can be HA - dunno how to implement this for synth
|
||||
// - Smaller exp -> Larger Exp can be calculated with: *** can use in Other units??? FMA??? insert this thing in later
|
||||
// Exp if in range: {~Exp[SNE-1], Exp[SNE-2:0]}
|
||||
// Exp in range if: Exp[SNE-1] = 1 & Exp[LNE-2:SNE] = 1111... & Exp[LNE-1] = 0 | Exp[SNE-1] = 0 & Exp[LNE-2:SNE] = 000... & Exp[LNE-1] = 1
|
||||
// i.e.: &Exp[LNE-2:SNE-1] xor Exp[LNE-1]
|
||||
// Too big if: Exp[LNE-1] = 1
|
||||
// Too small if: none of the above
|
||||
|
||||
// Select the bias of the output
|
||||
// fp -> int : select 1
|
||||
// ??? -> fp : pick the new bias depending on the output format
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
|
||||
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
|
||||
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
|
||||
default: NewBiasToFp = 1'bx;
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-2:0] NewBiasToFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
|
||||
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
|
||||
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
|
||||
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
end
|
||||
// select the old exponent
|
||||
// int -> fp : largest bias + XLEN
|
||||
// fp -> ??? : XExp
|
||||
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : XExpE;
|
||||
|
||||
// calculate CalcExp
|
||||
// fp -> fp :
|
||||
// - XExp - Largest bias + new bias - (ZeroCnt+1)
|
||||
// only do ^ if the input was denormalized
|
||||
// - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
|
||||
// - correct the expoent when there is a normalization shift ( + ZeroCnt+1)
|
||||
// fp -> int : XExp - Largest Bias + 1 - (ZeroCnt+1)
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized then we dont shift... so the "- (ZeroCnt+1)" is just leftovers from other options
|
||||
// int -> fp : largest bias XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
|
||||
// Process:
|
||||
// - shifted right by XLEN (XLEN)
|
||||
// - shift left to normilize (-1-ZeroCnt)
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XDenormE|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -`NF;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -`NF : -`NF1;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -`NF;
|
||||
`FMT1: ResNegNF = -`NF1;
|
||||
`FMT2: ResNegNF = -`NF2;
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -`Q_NF;
|
||||
2'h1: ResNegNF = -`D_NF;
|
||||
2'h0: ResNegNF = -`S_NF;
|
||||
2'h2: ResNegNF = -`H_NF;
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// sign
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the sign of the result
|
||||
// - if int -> fp
|
||||
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
|
||||
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
|
||||
// - otherwise: the floating point input's sign
|
||||
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// rounding
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// 11 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
|
||||
// round to -infinity - Plus1 if negative
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 1x - Plus1
|
||||
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
|
||||
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
|
||||
end
|
||||
`FMT1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
|
||||
end
|
||||
`FMT2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
|
||||
end
|
||||
default: begin
|
||||
ToFpSticky = 1'bx;
|
||||
ToFpRound = 1'bx;
|
||||
ToFpLSBFrac = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
|
||||
end
|
||||
2'h1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
|
||||
end
|
||||
2'h0: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
|
||||
end
|
||||
2'h2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
end
|
||||
|
||||
always_comb
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResSgn;//round down
|
||||
3'b011: CalcPlus1 = ~ResSgn;//round up
|
||||
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
// dont round if exact
|
||||
assign Plus1 = CalcPlus1&(Round|Sticky);
|
||||
|
||||
// shift the 1 to the propper position for rounding
|
||||
// - dont round it converting to integer
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
|
||||
default: ShiftedPlus1 = 0;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
|
||||
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
|
||||
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
|
||||
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
|
||||
endcase
|
||||
end
|
||||
// kill calcExp if the result is denormalized
|
||||
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
|
||||
// trim the result's expoent to size
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// flags
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate the flags
|
||||
|
||||
// find the maximum exponent (the exponent and larger overflows)
|
||||
if (`FPSIZES == 1) begin
|
||||
assign MaxExp = ToInt ? Int64 ? 65 : 33 : {`NE{1'b1}};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign MaxExp = ToInt ? Int64 ? 65 : 33 :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
MaxExpFp = {`NE{1'b1}};
|
||||
end
|
||||
`FMT1: begin
|
||||
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
end
|
||||
`FMT2: begin
|
||||
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
|
||||
end
|
||||
default: begin
|
||||
MaxExpFp = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
MaxExpFp = {`Q_NE{1'b1}};
|
||||
end
|
||||
2'h1: begin
|
||||
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
|
||||
end
|
||||
2'h0: begin
|
||||
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
|
||||
end
|
||||
2'h2: begin
|
||||
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
|
||||
end
|
||||
|
||||
// if the result exponent is larger then the maximum possible exponent
|
||||
// | and the exponent is positive
|
||||
// | | and the input is not NaN or Infinity
|
||||
// | | |
|
||||
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
|
||||
|
||||
// if the result is denormalized or underflowed
|
||||
// | and the result did not round into normal values
|
||||
// | | and the result is not exact
|
||||
// | | | and the result isn't NaN
|
||||
// | | | |
|
||||
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
|
||||
|
||||
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
|
||||
// if there were bits thrown away
|
||||
// | if overflowed or underflowed
|
||||
// | | and if not a NaN
|
||||
// | | |
|
||||
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
|
||||
|
||||
// if the result is too small to be represented and not 0
|
||||
// | and if the result is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// if an input was a singaling NaN(and we're using a FP input)
|
||||
// |
|
||||
assign FpInvalid = (XSNaNE&~IntToFp);
|
||||
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
|
||||
// if the input is NaN or infinity
|
||||
// | if the integer result overflows (out of range)
|
||||
// | | if the input was negitive but ouputing to a unsigned number
|
||||
// | | | the result doesn't round to zero
|
||||
// | | | | or the result rounds up out of bounds
|
||||
// | | | | and the result didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive result rounds up out of range
|
||||
// select the inexact flag to output
|
||||
assign Invalid = ToInt ? IntInvalid : FpInvalid;
|
||||
// pack the flags together
|
||||
// - fp -> int does not set the overflow or underflow flags
|
||||
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// result selection
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if you shoould kill the result
|
||||
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
NaNRes = 1'bx;
|
||||
InfRes = 1'bx;
|
||||
UfRes = 1'bx;
|
||||
Res = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// choose the floating point result
|
||||
// - if the input is NaN (and using the NaN input) output the NaN result
|
||||
// - if the input is infinity or the output overflows
|
||||
// - kill the InfE signal if the input isn't a floating point value
|
||||
// - if killing the result output the underflow result
|
||||
// - otherwise output the normal result
|
||||
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
|
||||
(XInfE&~IntToFp)|Overflow ? InfRes :
|
||||
KillRes ? UfRes :
|
||||
Res;
|
||||
// *** probably can optimize the negation
|
||||
// select the overflow integer result
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged result should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
// round and negate the positive result if needed
|
||||
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal result (trmined and sign extended if nessisary)
|
||||
assign CvtIntResE = Invalid ? OfIntRes :
|
||||
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
|
||||
endmodule
|
@ -43,8 +43,7 @@ module fma(
|
||||
input logic XSgnM, YSgnM, // input signs - memory stage
|
||||
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
|
||||
input logic ZOrigDenormE, // is the original precision denormalized
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is denorm
|
||||
input logic ZDenormE, // is denorm
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
|
||||
input logic XNaNM, YNaNM, ZNaNM, // is NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
|
||||
@ -73,10 +72,10 @@ module fma(
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
|
||||
logic Mult;
|
||||
logic ZOrigDenormM;
|
||||
logic ZDenormM;
|
||||
|
||||
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
@ -84,10 +83,10 @@ module fma(
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM});
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
|
||||
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
|
||||
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
|
||||
.FMAResM, .FMAFlgM);
|
||||
@ -101,7 +100,6 @@ module fma1(
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input's signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
|
||||
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single
|
||||
@ -116,13 +114,11 @@ module fma1(
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
|
||||
);
|
||||
|
||||
logic [`NE-1:0] Denorm; // value of a denormaized number based on precision
|
||||
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
|
||||
logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1)
|
||||
logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted
|
||||
logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed
|
||||
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
|
||||
logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
@ -133,8 +129,8 @@ module fma1(
|
||||
|
||||
|
||||
// calculate the product's exponent
|
||||
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal,
|
||||
.Denorm, .ProdExpE);
|
||||
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE,
|
||||
.ProdExpE);
|
||||
|
||||
// multiplication of the mantissa's
|
||||
mult mult(.XManE, .YManE, .ProdManE);
|
||||
@ -143,7 +139,7 @@ module fma1(
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal,
|
||||
align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE,
|
||||
.AlignedAddendE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// calculate the signs and take the opperation into account
|
||||
@ -167,51 +163,12 @@ endmodule
|
||||
module expadd(
|
||||
input logic [`FPSIZES/3:0] FmtE, // precision
|
||||
input logic [`NE-1:0] XExpE, YExpE, // input exponents
|
||||
input logic XDenormE, YDenormE, // are the inputs denormalized
|
||||
input logic XZeroE, YZeroE, // are the inputs zero
|
||||
output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
|
||||
output logic [`NE-1:0] Denorm, // value of denormalized exponent
|
||||
output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2
|
||||
);
|
||||
|
||||
|
||||
// denormalized numbers have diffrent values depending on which precison it is.
|
||||
// FLEN - 1
|
||||
// Other - BIAS - other bias + 1
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Denorm = 1;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
`FMT: Denorm = 1;
|
||||
`FMT1: Denorm = `BIAS-`BIAS1+1;
|
||||
`FMT2: Denorm = `BIAS-`BIAS2+1;
|
||||
default: Denorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
2'h3: Denorm = 1;
|
||||
2'h1: Denorm = `BIAS-`D_BIAS+1;
|
||||
2'h0: Denorm = `BIAS-`S_BIAS+1;
|
||||
2'h2: Denorm = `BIAS-`H_BIAS+1;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// pick denormalized value or exponent
|
||||
assign XExpVal = XDenormE ? Denorm : XExpE;
|
||||
assign YExpVal = YDenormE ? Denorm : YExpE;
|
||||
// kill the exponent if the product is zero - either X or Y is 0
|
||||
assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
|
||||
assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
|
||||
|
||||
endmodule
|
||||
|
||||
@ -258,13 +215,10 @@ endmodule
|
||||
|
||||
|
||||
module align(
|
||||
input logic [`NE-1:0] ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] ZManE, // fractions in U(0.NF) format]
|
||||
input logic ZDenormE, // is the input denormal
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
|
||||
input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
|
||||
input logic [`NE+1:0] ProdExpE, // the product's exponent
|
||||
input logic [`NE-1:0] Denorm, // the biased value of a denormalized number
|
||||
output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
|
||||
output logic AddendStickyE, // Sticky bit calculated from the aliged addend
|
||||
output logic KillProdE // should the product be set to zero
|
||||
@ -273,7 +227,6 @@ module align(
|
||||
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
|
||||
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
|
||||
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
|
||||
logic [`NE-1:0] ZExpVal; // Exponent value after taking into account denormals
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
@ -282,11 +235,9 @@ module align(
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have a diffrent exponent value depending on the precision
|
||||
assign ZExpVal = ZDenormE ? Denorm : ZExpE;
|
||||
// assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
|
||||
// *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22
|
||||
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
|
||||
// KP- yes we used ProdExpE originally but we did this for timing
|
||||
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpE};
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
@ -409,22 +360,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
|
||||
|
||||
|
||||
lzc lzc(.f, .NormCntE);
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
|
||||
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [3*`NF+6:0] f,
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift
|
||||
);
|
||||
|
||||
logic [$clog2(3*`NF+7)-1:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one
|
||||
NormCntE = i;
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
@ -450,7 +389,7 @@ module fma2(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZOrigDenormM, // is the original precision denormalized
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic Mult, // multiply opperation
|
||||
@ -465,7 +404,7 @@ module fma2(
|
||||
logic ResultSgn, ResultSgnTmp; // Result sign
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+2:0] NormSum; // normalized sum
|
||||
logic [`NF+1:0] NormSum; // normalized sum
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
@ -486,7 +425,7 @@ module fma2(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
|
||||
.ZOrigDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
|
||||
|
||||
|
||||
@ -533,7 +472,7 @@ module fma2(
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
|
||||
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
|
||||
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
|
||||
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
|
||||
@ -580,9 +519,9 @@ module normalize(
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZOrigDenormM,
|
||||
input logic ZDenormM,
|
||||
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
|
||||
output logic [`NF+2:0] NormSum, // normalized sum
|
||||
output logic [`NF+1:0] NormSum, // normalized sum
|
||||
output logic SumZero, // is the sum zero
|
||||
output logic NormSumSticky, UfSticky, // sticky bits
|
||||
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
@ -599,12 +538,12 @@ module normalize(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZOrigDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
@ -707,27 +646,27 @@ module normalize(
|
||||
assign LZAPlus2 = SumShifted[3*`NF+8];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
|
||||
|
||||
// Calculate the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// 3*NF+5 - NF1 - 3
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
|
||||
(|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
|
||||
|
||||
end
|
||||
|
||||
@ -745,7 +684,7 @@ module fmaround(
|
||||
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic UfSticky, // sticky bit for underlow calculation
|
||||
input logic [`NF+2:0] NormSum, // normalized sum
|
||||
input logic [`NF+1:0] NormSum, // normalized sum
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic NormSumSticky, // normalized sum's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
@ -799,83 +738,53 @@ module fmaround(
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = NormSum[2];
|
||||
assign Round = NormSum[1];
|
||||
assign LSBNormSum = NormSum[3];
|
||||
assign LSBNormSum = NormSum[2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfGuard = NormSum[1];
|
||||
assign UfRound = NormSum[0];
|
||||
assign UfLSBNormSum = NormSum[2];
|
||||
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | NormSum[0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/-------------NF---------------,
|
||||
// | NF1 | 3 | |
|
||||
// | NF1 | 2 | |
|
||||
// '-------NF1------^
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
|
||||
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
|
||||
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[2];
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[3];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[1];
|
||||
UfRound = NormSum[0];
|
||||
UfLSBNormSum = NormSum[2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[0];
|
||||
end
|
||||
`FMT1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`NF1+2];
|
||||
Round = NormSum[`NF-`NF1+1];
|
||||
LSBNormSum = NormSum[`NF-`NF1+3];
|
||||
LSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`NF1+1];
|
||||
UfRound = NormSum[`NF-`NF1];
|
||||
UfLSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`NF1];
|
||||
end
|
||||
`FMT2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`NF2+2];
|
||||
Round = NormSum[`NF-`NF2+1];
|
||||
LSBNormSum = NormSum[`NF-`NF2+3];
|
||||
LSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`NF2+1];
|
||||
UfRound = NormSum[`NF-`NF2];
|
||||
UfLSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`NF2];
|
||||
end
|
||||
default: begin
|
||||
Guard = 1'bx;
|
||||
Round = 1'bx;
|
||||
LSBNormSum = 1'bx;
|
||||
UfGuard = 1'bx;
|
||||
UfRound = 1'bx;
|
||||
UfLSBNormSum = 1'bx;
|
||||
Sticky = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -885,56 +794,40 @@ module fmaround(
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[2];
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[3];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[1];
|
||||
UfRound = NormSum[0];
|
||||
UfLSBNormSum = NormSum[2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[0];
|
||||
end
|
||||
2'h1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`D_NF+2];
|
||||
Round = NormSum[`NF-`D_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`D_NF+1];
|
||||
UfRound = NormSum[`NF-`D_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`D_NF];
|
||||
end
|
||||
2'h0: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`S_NF+2];
|
||||
Round = NormSum[`NF-`S_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`S_NF+1];
|
||||
UfRound = NormSum[`NF-`S_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`S_NF];
|
||||
end
|
||||
2'h2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Guard = NormSum[`NF-`H_NF+2];
|
||||
Round = NormSum[`NF-`H_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+3];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfGuard = NormSum[`NF-`H_NF+1];
|
||||
UfRound = NormSum[`NF-`H_NF];
|
||||
UfLSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// determine sticky
|
||||
Sticky = UfSticky | NormSum[`NF-`H_NF];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
// used to determine underflow flag
|
||||
assign UfLSBNormSum = Round;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
@ -944,28 +837,28 @@ module fmaround(
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up
|
||||
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
@ -973,9 +866,9 @@ module fmaround(
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
@ -1011,7 +904,7 @@ module fmaround(
|
||||
|
||||
end
|
||||
|
||||
assign NormSumTruncated = NormSum[`NF+2:3];
|
||||
assign NormSumTruncated = NormSum[`NF+1:2];
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
assign ResultExp = FullResultExp[`NE-1:0];
|
||||
|
||||
@ -1083,12 +976,12 @@ module fmaflags(
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
@ -1108,7 +1001,7 @@ module resultselect(
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZOrigDenormM, // is the original precision denormalized
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic ResultSgn, // the result's sign
|
||||
@ -1134,7 +1027,7 @@ module resultselect(
|
||||
end
|
||||
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1153,7 +1046,7 @@ module resultselect(
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
@ -1173,7 +1066,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1189,7 +1082,7 @@ module resultselect(
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
@ -1206,7 +1099,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
|
||||
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
|
||||
@ -1244,7 +1137,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
@ -1260,7 +1153,7 @@ module resultselect(
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
|
||||
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
|
||||
@ -1277,7 +1170,7 @@ module resultselect(
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
|
||||
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
|
||||
@ -1295,7 +1188,7 @@ module resultselect(
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
|
||||
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
|
||||
|
@ -72,7 +72,7 @@ module fpu (
|
||||
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
|
||||
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
|
||||
logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
@ -104,7 +104,6 @@ module fpu (
|
||||
logic XInfQ, YInfQ; // is the input infinity - divide
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic XNormE; // is normal
|
||||
logic ZOrigDenormE;
|
||||
logic FmtQ;
|
||||
logic FOpCtrlQ;
|
||||
|
||||
@ -114,9 +113,8 @@ module fpu (
|
||||
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [4:0] FMAFlgM; // FMA/multiply result
|
||||
logic [63:0] ReadResW; // read result (load instruction)
|
||||
logic [63:0] CvtFpResE; // add/FP -> FP convert result
|
||||
logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags
|
||||
logic [63:0] CvtResE; // FP <-> int convert result
|
||||
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
|
||||
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
|
||||
logic [63:0] ClassResE; // classify result
|
||||
logic [63:0] CmpResE; // compare result
|
||||
@ -152,7 +150,7 @@ module fpu (
|
||||
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
flopenrc #(16) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
|
||||
@ -177,7 +175,7 @@ module fpu (
|
||||
// unpack unit
|
||||
// - splits FP inputs into their various parts
|
||||
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
|
||||
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE,
|
||||
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
@ -189,11 +187,11 @@ module fpu (
|
||||
// - handles FMA and multiply instructions
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
|
||||
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FOpCtrlE, .ZOrigDenormE,
|
||||
.FOpCtrlE,
|
||||
.FmtE, .FmtM, .FrmM,
|
||||
.FMAFlgM, .FMAResM);
|
||||
|
||||
@ -214,13 +212,12 @@ module fpu (
|
||||
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
|
||||
|
||||
// other FP execution units
|
||||
fcvtfp fcvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
|
||||
fcvtint fcvtint (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
|
||||
.CvtResE, .CvtFlgE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
|
||||
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
@ -231,13 +228,15 @@ module fpu (
|
||||
mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
|
||||
// select a result that may be written to the FP register
|
||||
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
|
||||
mux5 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
|
||||
mux4 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
|
||||
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
|
||||
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
|
||||
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
|
||||
// *** make sure the fpu matches the chapter diagram
|
||||
|
||||
// E/M pipe registers
|
||||
|
||||
|
@ -12,7 +12,6 @@ module unpack (
|
||||
output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic ZOrigDenormE, // is the original precision denormalized
|
||||
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
|
||||
);
|
||||
|
||||
@ -30,9 +29,9 @@ module unpack (
|
||||
assign ZSgnE = Z[`FLEN-1];
|
||||
|
||||
// exponent
|
||||
assign XExpE = X[`FLEN-2:`NF];
|
||||
assign YExpE = Y[`FLEN-2:`NF];
|
||||
assign ZExpE = Z[`FLEN-2:`NF];
|
||||
assign XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
|
||||
assign YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
|
||||
assign ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
|
||||
|
||||
// fraction (no assumed 1)
|
||||
assign XFracE = X[`NF-1:0];
|
||||
@ -49,7 +48,11 @@ module unpack (
|
||||
assign YExpMaxE = &YExpE;
|
||||
assign ZExpMaxE = &ZExpE;
|
||||
|
||||
assign ZOrigDenormE = 1'b0;
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
assign XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero;
|
||||
assign YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero;
|
||||
assign ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero;
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
|
||||
@ -73,7 +76,6 @@ module unpack (
|
||||
// double and half
|
||||
|
||||
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
|
||||
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
|
||||
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
@ -95,14 +97,16 @@ module unpack (
|
||||
|
||||
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
|
||||
// - if the original precision had a denormal number convert the exponent value 1
|
||||
assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
|
||||
assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
|
||||
assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
|
||||
assign XExpE = FmtE ? {X[`FLEN-2:`NF+1], X[`NF]|XDenormE} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE};
|
||||
assign YExpE = FmtE ? {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE};
|
||||
assign ZExpE = FmtE ? {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE};
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
|
||||
assign YOrigDenormE = FmtE ? 0 : ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
|
||||
assign ZOrigDenormE = FmtE ? 0 : ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
assign XDenormE = (FmtE ? ~|X[`FLEN-2:`NF] : ~|XLen1[`LEN1-2:`NF1]) & ~XFracZero;
|
||||
assign YDenormE = (FmtE ? ~|Y[`FLEN-2:`NF] : ~|YLen1[`LEN1-2:`NF1]) & ~YFracZero;
|
||||
assign ZDenormE = (FmtE ? ~|Z[`FLEN-2:`NF] : ~|ZLen1[`LEN1-2:`NF1]) & ~ZFracZero;
|
||||
|
||||
// extract the fraction, add trailing zeroes to the mantissa if nessisary
|
||||
assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
@ -141,7 +145,6 @@ module unpack (
|
||||
|
||||
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
|
||||
logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
|
||||
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
|
||||
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
|
||||
@ -156,14 +159,15 @@ module unpack (
|
||||
// There are 2 case statements
|
||||
// - one for other singals and one for sgn/exp/frac
|
||||
// - need two for the dependencies in the expoenent calculation
|
||||
//*** pull out the ~FracZero and and it at the end
|
||||
always_comb begin
|
||||
case (FmtE)
|
||||
`FMT: begin // if input is largest precision (`FLEN - ie quad or double)
|
||||
|
||||
// This is the original format so set OrigDenorm to 0
|
||||
XOrigDenormE = 1'b0;
|
||||
YOrigDenormE = 1'b0;
|
||||
ZOrigDenormE = 1'b0;
|
||||
XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero;
|
||||
YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero;
|
||||
ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |X[`FLEN-2:`NF];
|
||||
@ -178,9 +182,9 @@ module unpack (
|
||||
`FMT1: begin // if input is larger precsion (`LEN1 - double or single)
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
|
||||
XDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
|
||||
YDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
|
||||
ZDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen1[`LEN1-2:`NF1];
|
||||
@ -195,9 +199,9 @@ module unpack (
|
||||
`FMT2: begin // if input is smallest precsion (`LEN2 - single or half)
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero;
|
||||
XDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero;
|
||||
YDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero;
|
||||
ZDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen2[`LEN2-2:`NF2];
|
||||
@ -210,9 +214,9 @@ module unpack (
|
||||
ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
|
||||
end
|
||||
default: begin
|
||||
XOrigDenormE = 0;
|
||||
YOrigDenormE = 0;
|
||||
ZOrigDenormE = 0;
|
||||
XDenormE = 0;
|
||||
YDenormE = 0;
|
||||
ZDenormE = 0;
|
||||
XExpNonzero = 0;
|
||||
YExpNonzero = 0;
|
||||
ZExpNonzero = 0;
|
||||
@ -231,9 +235,9 @@ module unpack (
|
||||
ZSgnE = Z[`FLEN-1];
|
||||
|
||||
// extract the exponent
|
||||
XExpE = X[`FLEN-2:`NF];
|
||||
YExpE = Y[`FLEN-2:`NF];
|
||||
ZExpE = Z[`FLEN-2:`NF];
|
||||
XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
|
||||
YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
|
||||
ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
|
||||
|
||||
// extract the fraction
|
||||
XFracE = X[`NF-1:0];
|
||||
@ -256,9 +260,9 @@ module unpack (
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the larger precision's exponent to use the largest precision's bias
|
||||
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
|
||||
XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE};
|
||||
YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE};
|
||||
ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
@ -281,9 +285,9 @@ module unpack (
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the smallest precision's exponent to use the largest precision's bias
|
||||
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]};
|
||||
XExpE = XDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]};
|
||||
YExpE = YDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]};
|
||||
ZExpE = ZDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
@ -318,7 +322,6 @@ module unpack (
|
||||
logic [`D_LEN-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
|
||||
logic [`S_LEN-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
|
||||
logic [`H_LEN-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
|
||||
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
|
||||
|
||||
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
|
||||
assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
|
||||
@ -343,10 +346,10 @@ module unpack (
|
||||
case (FmtE)
|
||||
2'b11: begin // if input is quad percision
|
||||
|
||||
// This is the original format so set OrigDenorm to 0
|
||||
XOrigDenormE = 1'b0;
|
||||
YOrigDenormE = 1'b0;
|
||||
ZOrigDenormE = 1'b0;
|
||||
// is the input (in it's original format) denormalized
|
||||
XDenormE = ~|X[`Q_LEN-2:`Q_NF] & ~XFracZero;
|
||||
YDenormE = ~|Y[`Q_LEN-2:`Q_NF] & ~YFracZero;
|
||||
ZDenormE = ~|Z[`Q_LEN-2:`Q_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |X[`Q_LEN-2:`Q_NF];
|
||||
@ -366,9 +369,9 @@ module unpack (
|
||||
ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero;
|
||||
XDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero;
|
||||
YDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero;
|
||||
ZDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen1[`D_LEN-2:`D_NF];
|
||||
@ -383,9 +386,9 @@ module unpack (
|
||||
ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero;
|
||||
XDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero;
|
||||
YDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero;
|
||||
ZDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen2[`S_LEN-2:`S_NF];
|
||||
@ -400,9 +403,9 @@ module unpack (
|
||||
ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
|
||||
|
||||
// is the input (in it's original format) denormalized
|
||||
XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero;
|
||||
YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero;
|
||||
ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero;
|
||||
XDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero;
|
||||
YDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero;
|
||||
ZDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero;
|
||||
|
||||
// is the exponent non-zero
|
||||
XExpNonzero = |XLen3[`H_LEN-2:`H_NF];
|
||||
@ -421,9 +424,9 @@ module unpack (
|
||||
ZSgnE = Z[`Q_LEN-1];
|
||||
|
||||
// extract the exponent
|
||||
XExpE = X[`Q_LEN-2:`Q_NF];
|
||||
YExpE = Y[`Q_LEN-2:`Q_NF];
|
||||
ZExpE = Z[`Q_LEN-2:`Q_NF];
|
||||
XExpE = {X[`Q_LEN-2:`Q_NF+1], X[`Q_NF]|XDenormE};
|
||||
YExpE = {Y[`Q_LEN-2:`Q_NF+1], Y[`Q_NF]|YDenormE};
|
||||
ZExpE = {Z[`Q_LEN-2:`Q_NF+1], Z[`Q_NF]|ZDenormE};
|
||||
|
||||
// extract the fraction
|
||||
XFracE = X[`Q_NF-1:0];
|
||||
@ -446,9 +449,9 @@ module unpack (
|
||||
|
||||
// convert the double precsion exponent into quad precsion
|
||||
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]};
|
||||
XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF+1], XLen1[`D_NF]|XDenormE};
|
||||
YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF+1], YLen1[`D_NF]|YDenormE};
|
||||
ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF+1], ZLen1[`D_NF]|ZDenormE};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
@ -470,9 +473,9 @@ module unpack (
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the single precsion exponent into quad precsion
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]};
|
||||
XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF+1], XLen2[`S_NF]|XDenormE};
|
||||
YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF+1], YLen2[`S_NF]|YDenormE};
|
||||
ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF+1], ZLen2[`S_NF]|ZDenormE};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
@ -494,9 +497,9 @@ module unpack (
|
||||
// also need to take into account possible zero/denorm/inf/NaN values
|
||||
|
||||
// convert the half precsion exponent into quad precsion
|
||||
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]};
|
||||
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]};
|
||||
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]};
|
||||
XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF+1], XLen3[`H_NF]|XDenormE};
|
||||
YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF+1], YLen3[`H_NF]|YDenormE};
|
||||
ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF+1], ZLen3[`H_NF]|ZDenormE};
|
||||
|
||||
// extract the fraction and add the nessesary trailing zeros
|
||||
XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
|
||||
@ -537,10 +540,10 @@ module unpack (
|
||||
assign YSNaNE = YNaNE&~YFracE[`NF-1];
|
||||
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
|
||||
|
||||
// is the input denormalized
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
// // is the input denormalized
|
||||
// assign XDenormE = XExpZero & ~XFracZero;
|
||||
// assign YDenormE = YExpZero & ~YFracZero;
|
||||
// assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
// is the input infinity
|
||||
assign XInfE = XExpMaxE & XFracZero;
|
||||
|
15
pipelined/src/generic/lzc.sv
Normal file
15
pipelined/src/generic/lzc.sv
Normal file
@ -0,0 +1,15 @@
|
||||
//leading zero counter i.e. priority encoder
|
||||
module lzc #(parameter WIDTH=1) (
|
||||
input logic [WIDTH-1:0] num,
|
||||
output logic [$clog2(WIDTH)-1:0] ZeroCnt
|
||||
);
|
||||
/* verilator lint_off CMPCONST */
|
||||
|
||||
logic [$clog2(WIDTH)-1:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1; // search for leading one
|
||||
ZeroCnt = i;
|
||||
end
|
||||
/* verilator lint_on CMPCONST */
|
||||
endmodule
|
@ -126,6 +126,16 @@ module ppa_mult_128 #(parameter WIDTH=128) (
|
||||
assign y = a * b;
|
||||
endmodule
|
||||
|
||||
module ppa_alu_8 #(parameter WIDTH=8) (
|
||||
input logic [WIDTH-1:0] A, B,
|
||||
input logic [2:0] ALUControl,
|
||||
input logic [2:0] Funct3,
|
||||
output logic [WIDTH-1:0] Result,
|
||||
output logic [WIDTH-1:0] Sum);
|
||||
|
||||
ppa_alu #(WIDTH) alu (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_alu_16 #(parameter WIDTH=16) (
|
||||
input logic [WIDTH-1:0] A, B,
|
||||
input logic [2:0] ALUControl,
|
||||
@ -133,7 +143,7 @@ module ppa_alu_16 #(parameter WIDTH=16) (
|
||||
output logic [WIDTH-1:0] Result,
|
||||
output logic [WIDTH-1:0] Sum);
|
||||
|
||||
ppa_alu #(WIDTH) alu_16 (.*);
|
||||
ppa_alu #(WIDTH) alu (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_alu_32 #(parameter WIDTH=32) (
|
||||
@ -143,7 +153,7 @@ module ppa_alu_32 #(parameter WIDTH=32) (
|
||||
output logic [WIDTH-1:0] Result,
|
||||
output logic [WIDTH-1:0] Sum);
|
||||
|
||||
ppa_alu #(WIDTH) alu_32 (.*);
|
||||
ppa_alu #(WIDTH) alu (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_alu_64 #(parameter WIDTH=64) (
|
||||
@ -153,7 +163,17 @@ module ppa_alu_64 #(parameter WIDTH=64) (
|
||||
output logic [WIDTH-1:0] Result,
|
||||
output logic [WIDTH-1:0] Sum);
|
||||
|
||||
ppa_alu #(WIDTH) alu_64 (.*);
|
||||
ppa_alu #(WIDTH) alu (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_alu_128 #(parameter WIDTH=128) (
|
||||
input logic [WIDTH-1:0] A, B,
|
||||
input logic [2:0] ALUControl,
|
||||
input logic [2:0] Funct3,
|
||||
output logic [WIDTH-1:0] Result,
|
||||
output logic [WIDTH-1:0] Sum);
|
||||
|
||||
ppa_alu #(WIDTH) alu (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_alu #(parameter WIDTH=32) (
|
||||
@ -209,9 +229,11 @@ module ppa_alu #(parameter WIDTH=32) (
|
||||
3'b111: FullResult = A & B; // and
|
||||
endcase
|
||||
|
||||
// support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
|
||||
if (WIDTH==64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
else assign Result = FullResult;
|
||||
assign Result = FullResult;
|
||||
// not using W64 so it has the same architecture regardless of width
|
||||
// // support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
|
||||
// if (WIDTH==64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
|
||||
// else assign Result = FullResult;
|
||||
endmodule
|
||||
|
||||
module ppa_shiftleft_8 #(parameter WIDTH=8) (
|
||||
@ -254,51 +276,6 @@ module ppa_shiftleft_128 #(parameter WIDTH=128) (
|
||||
assign y = a << amt;
|
||||
endmodule
|
||||
|
||||
module ppa_shifter_8 #(parameter WIDTH=8) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
input logic Right, Arith, W64,
|
||||
output logic [WIDTH-1:0] Y);
|
||||
|
||||
ppa_shifter #(WIDTH) sh (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_shifter_16 #(parameter WIDTH=16) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
input logic Right, Arith, W64,
|
||||
output logic [WIDTH-1:0] Y);
|
||||
|
||||
ppa_shifter #(WIDTH) sh (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_shifter_32 #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
input logic Right, Arith, W64,
|
||||
output logic [WIDTH-1:0] Y);
|
||||
|
||||
ppa_shifter #(WIDTH) sh (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_shifter_64 #(parameter WIDTH=64) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
input logic Right, Arith, W64,
|
||||
output logic [WIDTH-1:0] Y);
|
||||
|
||||
ppa_shifter #(WIDTH) sh (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_shifter_128 #(parameter WIDTH=128) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
input logic Right, Arith, W64,
|
||||
output logic [WIDTH-1:0] Y);
|
||||
|
||||
ppa_shifter #(WIDTH) sh (.*);
|
||||
endmodule
|
||||
|
||||
module ppa_shifter #(parameter WIDTH=32) (
|
||||
input logic [WIDTH-1:0] A,
|
||||
input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
@ -313,29 +290,35 @@ module ppa_shifter #(parameter WIDTH=32) (
|
||||
// For RV64, 32 and 64-bit shifts are needed, with sign extension.
|
||||
|
||||
// funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong)
|
||||
if (WIDTH == 64 | WIDTH ==128) begin:shifter // RV64 or 128
|
||||
always_comb // funnel mux
|
||||
if (W64) begin // 32-bit shifts
|
||||
if (Right)
|
||||
if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
|
||||
else z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
|
||||
else z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
|
||||
end else begin
|
||||
if (Right)
|
||||
if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
|
||||
else z = {{WIDTH-1{1'b0}}, A};
|
||||
else z = {A, {WIDTH-1{1'b0}}};
|
||||
end
|
||||
assign amttrunc = W64 ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift
|
||||
end else begin:shifter // RV32 or less
|
||||
always_comb // funnel mux
|
||||
// if (WIDTH == 64 | WIDTH ==128) begin:shifter // RV64 or 128
|
||||
// always_comb // funnel mux
|
||||
// if (W64) begin // 32-bit shifts
|
||||
// if (Right)
|
||||
// if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
|
||||
// else z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
|
||||
// else z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
|
||||
// end else begin
|
||||
// if (Right)
|
||||
// if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
|
||||
// else z = {{WIDTH-1{1'b0}}, A};
|
||||
// else z = {A, {WIDTH-1{1'b0}}};
|
||||
// end
|
||||
// assign amttrunc = W64 ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift
|
||||
// end else begin:shifter // RV32 or less
|
||||
// always_comb // funnel mux
|
||||
// if (Right)
|
||||
// if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
|
||||
// else z = {{WIDTH-1{1'b0}}, A};
|
||||
// else z = {A, {WIDTH-1{1'b0}}};
|
||||
// assign amttrunc = Amt; // shift amount
|
||||
// end
|
||||
|
||||
always_comb // funnel mux
|
||||
if (Right)
|
||||
if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
|
||||
else z = {{WIDTH-1{1'b0}}, A};
|
||||
else z = {A, {WIDTH-1{1'b0}}};
|
||||
assign amttrunc = Amt; // shift amount
|
||||
end
|
||||
|
||||
|
||||
// opposite offset for right shfits
|
||||
assign offset = Right ? amttrunc : ~amttrunc;
|
||||
@ -345,7 +328,51 @@ module ppa_shifter #(parameter WIDTH=32) (
|
||||
assign Y = zshift[WIDTH-1:0];
|
||||
endmodule
|
||||
|
||||
// just report one hot
|
||||
// module ppa_shifter_8 #(parameter WIDTH=8) (
|
||||
// input logic [WIDTH-1:0] A,
|
||||
// input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
// input logic Right, Arith, W64,
|
||||
// output logic [WIDTH-1:0] Y);
|
||||
|
||||
// ppa_shifter #(WIDTH) sh (.*);
|
||||
// endmodule
|
||||
|
||||
// module ppa_shifter_16 #(parameter WIDTH=16) (
|
||||
// input logic [WIDTH-1:0] A,
|
||||
// input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
// input logic Right, Arith, W64,
|
||||
// output logic [WIDTH-1:0] Y);
|
||||
|
||||
// ppa_shifter #(WIDTH) sh (.*);
|
||||
// endmodule
|
||||
|
||||
// module ppa_shifter_32 #(parameter WIDTH=32) (
|
||||
// input logic [WIDTH-1:0] A,
|
||||
// input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
// input logic Right, Arith, W64,
|
||||
// output logic [WIDTH-1:0] Y);
|
||||
|
||||
// ppa_shifter #(WIDTH) sh (.*);
|
||||
// endmodule
|
||||
|
||||
// module ppa_shifter_64 #(parameter WIDTH=64) (
|
||||
// input logic [WIDTH-1:0] A,
|
||||
// input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
// input logic Right, Arith, W64,
|
||||
// output logic [WIDTH-1:0] Y);
|
||||
|
||||
// ppa_shifter #(WIDTH) sh (.*);
|
||||
// endmodule
|
||||
|
||||
// module ppa_shifter_128 #(parameter WIDTH=128) (
|
||||
// input logic [WIDTH-1:0] A,
|
||||
// input logic [$clog2(WIDTH)-1:0] Amt,
|
||||
// input logic Right, Arith, W64,
|
||||
// output logic [WIDTH-1:0] Y);
|
||||
|
||||
// ppa_shifter #(WIDTH) sh (.*);
|
||||
// endmodule
|
||||
|
||||
module ppa_prioritythermometer #(parameter N = 8) (
|
||||
input logic [N-1:0] a,
|
||||
output logic [N-1:0] y);
|
||||
@ -538,7 +565,7 @@ endmodule
|
||||
|
||||
// *** some way to express data-critical inputs
|
||||
|
||||
module ppa_flop_8 #(parameter WIDTH = 8) (
|
||||
module ppa_flop #(parameter WIDTH = 8) (
|
||||
input logic clk,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
@ -547,13 +574,26 @@ module ppa_flop_8 #(parameter WIDTH = 8) (
|
||||
q <= #1 d;
|
||||
endmodule
|
||||
|
||||
module ppa_flop_8 #(parameter WIDTH = 8) (
|
||||
input logic clk,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flop #(WIDTH) f1(clk, d, q1);
|
||||
ppa_flop #(WIDTH) f2(clk, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flop_16 #(parameter WIDTH = 16) (
|
||||
input logic clk,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flop #(WIDTH) f1(clk, d, q1);
|
||||
ppa_flop #(WIDTH) f2(clk, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flop_32 #(parameter WIDTH = 32) (
|
||||
@ -561,8 +601,10 @@ module ppa_flop_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flop #(WIDTH) f1(clk, d, q1);
|
||||
ppa_flop #(WIDTH) f2(clk, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flop_64 #(parameter WIDTH = 64) (
|
||||
@ -570,8 +612,10 @@ module ppa_flop_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flop #(WIDTH) f1(clk, d, q1);
|
||||
ppa_flop #(WIDTH) f2(clk, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flop_128 #(parameter WIDTH = 128) (
|
||||
@ -579,8 +623,20 @@ module ppa_flop_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flop #(WIDTH) f1(clk, d, q1);
|
||||
ppa_flop #(WIDTH) f2(clk, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopr #(parameter WIDTH = 8) (
|
||||
input logic clk, reset,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
q <= #1 d;
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
endmodule
|
||||
|
||||
module ppa_flopr_8 #(parameter WIDTH = 8) (
|
||||
@ -588,9 +644,10 @@ module ppa_flopr_8 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopr_16 #(parameter WIDTH = 16) (
|
||||
@ -598,9 +655,10 @@ module ppa_flopr_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopr_32 #(parameter WIDTH = 32) (
|
||||
@ -608,9 +666,10 @@ module ppa_flopr_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopr_64 #(parameter WIDTH = 64) (
|
||||
@ -618,9 +677,10 @@ module ppa_flopr_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopr_128 #(parameter WIDTH = 128) (
|
||||
@ -628,7 +688,18 @@ module ppa_flopr_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_floprasync #(parameter WIDTH = 8) (
|
||||
input logic clk, reset,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
endmodule
|
||||
@ -638,9 +709,10 @@ module ppa_floprasync_8 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_floprasync_16 #(parameter WIDTH = 16) (
|
||||
@ -648,9 +720,10 @@ module ppa_floprasync_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_floprasync_32 #(parameter WIDTH = 32) (
|
||||
@ -658,9 +731,10 @@ module ppa_floprasync_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_floprasync_64 #(parameter WIDTH = 64) (
|
||||
@ -668,9 +742,10 @@ module ppa_floprasync_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_floprasync_128 #(parameter WIDTH = 128) (
|
||||
@ -678,9 +753,20 @@ module ppa_floprasync_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk or posedge reset)
|
||||
if (reset) q <= #1 0;
|
||||
else q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
|
||||
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr #(parameter WIDTH = 8) (
|
||||
input logic clk, reset, en,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr_8 #(parameter WIDTH = 8) (
|
||||
@ -688,9 +774,10 @@ module ppa_flopenr_8 #(parameter WIDTH = 8) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
|
||||
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr_16 #(parameter WIDTH = 16) (
|
||||
@ -698,9 +785,10 @@ module ppa_flopenr_16 #(parameter WIDTH = 16) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
|
||||
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr_32 #(parameter WIDTH = 32) (
|
||||
@ -708,9 +796,10 @@ module ppa_flopenr_32 #(parameter WIDTH = 32) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
|
||||
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr_64 #(parameter WIDTH = 64) (
|
||||
@ -718,9 +807,10 @@ module ppa_flopenr_64 #(parameter WIDTH = 64) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
|
||||
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_flopenr_128 #(parameter WIDTH = 128) (
|
||||
@ -728,9 +818,10 @@ module ppa_flopenr_128 #(parameter WIDTH = 128) (
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
if (reset) q <= #1 0;
|
||||
else if (en) q <= #1 d;
|
||||
logic [WIDTH-1:0] q1;
|
||||
|
||||
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
|
||||
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
|
||||
endmodule
|
||||
|
||||
module ppa_csa_8 #(parameter WIDTH = 8) (
|
||||
|
@ -8,3 +8,9 @@ testgen: testgen.c
|
||||
|
||||
qst2: qst2.c
|
||||
gcc qst2.c -lm -o qst2
|
||||
gcc -lm -o testgen testgen.c
|
||||
./testgen
|
||||
|
||||
exptestgen: exptestgen.c
|
||||
gcc -lm -o exptestgen exptestgen.c
|
||||
./exptestgen
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/* Prototypes */
|
||||
|
||||
void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa);
|
||||
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
|
||||
void printhex(FILE *fptr, double x);
|
||||
double random_input(void);
|
||||
double random_input_e(void);
|
||||
@ -31,12 +31,13 @@ double random_input_e(void);
|
||||
void main(void)
|
||||
{
|
||||
FILE *fptr;
|
||||
// e1 & e2 are exponents
|
||||
// a & b are mantissas
|
||||
// r_mantissa is result of mantissa divsion
|
||||
// r_exp is result of exponent division
|
||||
double a, b, r_mantissa, r_exp;
|
||||
int e1, e2;
|
||||
// aExp & bExp are exponents
|
||||
// aFrac & bFrac are mantissas
|
||||
// rFrac is result of fractional divsion
|
||||
// rExp is result of exponent division
|
||||
double aFrac, bFrac, rFrac;
|
||||
int aExp, bExp, rExp;
|
||||
int aSign, bSign, rSign;
|
||||
double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
|
||||
1.75, 1.875, 1.99999,
|
||||
1.1, 1.2, 1.01, 1.001, 1.0001,
|
||||
@ -51,25 +52,28 @@ void main(void)
|
||||
}
|
||||
|
||||
for (i=0; i<ENTRIES; i++) {
|
||||
b = mantissa[i];
|
||||
e2 = exponent[i] + bias;
|
||||
bFrac = mantissa[i];
|
||||
bExp = exponent[i] + bias;
|
||||
bSign = i%2;
|
||||
for (j=0; j<ENTRIES; j++) {
|
||||
a = mantissa[j];
|
||||
e1 = exponent[j] + bias;
|
||||
r_mantissa = a/b;
|
||||
r_exp = e1 - e2 + bias;
|
||||
output(fptr, e1, a, e2, b, r_exp, r_mantissa);
|
||||
aFrac = mantissa[j];
|
||||
aExp = exponent[j] + bias;
|
||||
aSign = j%2;
|
||||
rFrac = aFrac/bFrac;
|
||||
rExp = aExp - bExp + bias;
|
||||
rSign = (i+j)%2;
|
||||
output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
|
||||
}
|
||||
}
|
||||
|
||||
// for (i = 0; i< RANDOM_VECS; i++) {
|
||||
// a = random_input();
|
||||
// b = random_input();
|
||||
// e1 = random_input_e() + BIAS; // make new random input function for exponents
|
||||
// e2 = random_input_e() + BIAS;
|
||||
// r_mantissa = a/b;
|
||||
// r_exp = e1 - e2 + BIAS;
|
||||
// output(fptr, e1, a, e2, b, r_exp, r_mantissa);
|
||||
// aFrac = random_input();
|
||||
// bFrac = random_input();
|
||||
// aExp = random_input_e() + BIAS; // make new random input function for exponents
|
||||
// bExp = random_input_e() + BIAS;
|
||||
// rFrac = a/b;
|
||||
// rEx[] = e1 - e2 + BIAS;
|
||||
// output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
|
||||
// }
|
||||
|
||||
fclose(fptr);
|
||||
@ -77,19 +81,21 @@ void main(void)
|
||||
|
||||
/* Functions */
|
||||
|
||||
void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa)
|
||||
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
|
||||
{
|
||||
fprintf(fptr, "%03x", e1);
|
||||
//printhex(fptr, e1, exp);
|
||||
printhex(fptr, a);
|
||||
// Print a in standard double format
|
||||
fprintf(fptr, "%03x", aExp|(aSign<<11));
|
||||
printhex(fptr, aFrac);
|
||||
fprintf(fptr, "_");
|
||||
fprintf(fptr, "%03x", e2);
|
||||
//printhex(fptr, e2, exp);
|
||||
printhex(fptr, b);
|
||||
|
||||
// Print b in standard double format
|
||||
fprintf(fptr, "%03x", bExp|(bSign<<11));
|
||||
printhex(fptr, bFrac);
|
||||
fprintf(fptr, "_");
|
||||
fprintf(fptr, "%03x", r_exp);
|
||||
//printhex(fptr, r_exp, exp);
|
||||
printhex(fptr, r_mantissa);
|
||||
|
||||
// Print r in standard double format
|
||||
fprintf(fptr, "%03x", rExp|(rSign<<11));
|
||||
printhex(fptr, rFrac);
|
||||
fprintf(fptr, "\n");
|
||||
}
|
||||
|
||||
|
@ -37,6 +37,8 @@ module srt #(parameter Nf=52) (
|
||||
input logic Flush, // *** multiple pipe stages
|
||||
// Floating Point Inputs
|
||||
// later add exponents, signs, special cases
|
||||
input logic XSign, YSign,
|
||||
input logic [`NE-1:0] XExp, YExp,
|
||||
input logic [Nf-1:0] SrcXFrac, SrcYFrac,
|
||||
input logic [`XLEN-1:0] SrcA, SrcB,
|
||||
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
|
||||
@ -44,14 +46,18 @@ module srt #(parameter Nf=52) (
|
||||
input logic Signed, // Interpret integers as signed 2's complement
|
||||
input logic Int, // Choose integer inputss
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic [Nf-1:0] Quot, Rem, // *** later handle integers
|
||||
output logic rsign,
|
||||
output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
|
||||
output logic [`NE-1:0] rExp,
|
||||
output logic [3:0] Flags
|
||||
);
|
||||
|
||||
logic qp, qz, qm; // quotient is +1, 0, or -1
|
||||
logic [Nf-1:0] X, Dpreproc;
|
||||
logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
|
||||
logic [Nf+2:0] rp, rm;
|
||||
logic [`NE-1:0] calcExp;
|
||||
logic calcSign;
|
||||
logic [Nf-1:0] X, Dpreproc;
|
||||
logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
|
||||
logic [Nf+2:0] rp, rm;
|
||||
|
||||
srtpreproc #(Nf) preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc);
|
||||
|
||||
@ -70,6 +76,8 @@ module srt #(parameter Nf=52) (
|
||||
// Accumulate quotient digits in a shift register
|
||||
qsel #(Nf) qsel(WS[55:52], WC[55:52], qp, qz, qm);
|
||||
qacc #(Nf+3) qacc(clk, Start, qp, qz, qm, rp, rm);
|
||||
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
|
||||
flopen #(1) signflop(clk, Start, calcSign, rsign);
|
||||
|
||||
// Divisor Selection logic
|
||||
inv dinv(D, Db);
|
||||
@ -77,6 +85,12 @@ module srt #(parameter Nf=52) (
|
||||
|
||||
// Partial Product Generation
|
||||
csa csa(WS, WC, Dsel, qp, WSA, WCA);
|
||||
|
||||
otfc2 otfc2(clk, Start, qp, qz, qm, QuotOTFC);
|
||||
|
||||
expcalc expcalc(.XExp, .YExp, .calcExp);
|
||||
|
||||
signcalc signcalc(.XSign, .YSign, .calcSign);
|
||||
|
||||
srtpostproc postproc(rp, rm, Quot);
|
||||
endmodule
|
||||
@ -198,9 +212,57 @@ module qacc #(parameter N=55) (
|
||||
end */
|
||||
endmodule
|
||||
|
||||
//////////
|
||||
// otfc //
|
||||
//////////
|
||||
|
||||
module otfc2 #(parameter N=52) (
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic qp, qz, qm,
|
||||
output logic [N-1:0] r
|
||||
);
|
||||
|
||||
// The on-the-fly converter transfers the quotient
|
||||
// bits to the quotient as they come.
|
||||
//
|
||||
// This code follows the psuedocode presented in the
|
||||
// floating point chapter of the book. Right now,
|
||||
// it is written for Radix-2 division.
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
logic [N+2:0] Q, QM, QNext, QMNext;
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [N+1:0] QR, QMR;
|
||||
|
||||
flopr #(N+3) Qreg(clk, Start, QNext, Q);
|
||||
flopr #(N+3) QMreg(clk, Start, QMNext, QM);
|
||||
|
||||
always_comb begin
|
||||
QR = Q[N+1:0];
|
||||
QMR = QM[N+1:0]; // Shift Q and QM
|
||||
if (qp) begin
|
||||
QNext = {QR, 1'b1};
|
||||
QMNext = {QR, 1'b0};
|
||||
end else if (qz) begin
|
||||
QNext = {QR, 1'b0};
|
||||
QMNext = {QMR, 1'b1};
|
||||
end else begin // If qp and qz are not true, then qm is
|
||||
QNext = {QMR, 1'b1};
|
||||
QMNext = {QMR, 1'b0};
|
||||
end
|
||||
end
|
||||
assign r = Q[54] ? Q[53:2] : Q[52:1];
|
||||
|
||||
endmodule
|
||||
|
||||
/////////
|
||||
// inv //
|
||||
/////////
|
||||
|
||||
module inv(input logic [55:0] in,
|
||||
output logic [55:0] out);
|
||||
|
||||
@ -247,6 +309,33 @@ module csa #(parameter N=56) (
|
||||
(in2[54:0] & in3[54:0]), cin};
|
||||
endmodule
|
||||
|
||||
|
||||
//////////////
|
||||
// expcalc //
|
||||
//////////////
|
||||
|
||||
module expcalc(
|
||||
input logic [`NE-1:0] XExp, YExp,
|
||||
output logic [`NE-1:0] calcExp
|
||||
);
|
||||
|
||||
assign calcExp = XExp - YExp + 11'b01111111111;
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////
|
||||
// signcalc //
|
||||
//////////////
|
||||
|
||||
module signcalc(
|
||||
input logic XSign, YSign,
|
||||
output logic calcSign
|
||||
);
|
||||
|
||||
assign calcSign = XSign ^ YSign;
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////
|
||||
// finaladd //
|
||||
//////////////
|
||||
|
@ -38,35 +38,41 @@ endmodule
|
||||
//////////
|
||||
module testbench;
|
||||
logic clk;
|
||||
logic req;
|
||||
logic req;
|
||||
logic done;
|
||||
logic [51:0] a;
|
||||
logic [51:0] b;
|
||||
logic [51:0] r;
|
||||
logic [54:0] rp, rm; // positive quotient digits
|
||||
logic [63:0] a, b;
|
||||
logic [51:0] afrac, bfrac;
|
||||
logic [10:0] aExp, bExp;
|
||||
logic asign, bsign;
|
||||
logic [51:0] r, rOTFC;
|
||||
logic [54:0] rp, rm; // positive quotient digits
|
||||
|
||||
// Test parameters
|
||||
parameter MEM_SIZE = 40000;
|
||||
parameter MEM_WIDTH = 52+52+52;
|
||||
parameter MEM_WIDTH = 64+64+64;
|
||||
|
||||
`define memr 51:0
|
||||
`define memb 103:52
|
||||
`define mema 155:104
|
||||
`define memr 63:0
|
||||
`define memb 127:64
|
||||
`define mema 191:128
|
||||
|
||||
// Test logicisters
|
||||
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
|
||||
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
|
||||
// bit field of an array
|
||||
logic [51:0] correctr, nextr, diffn, diffp;
|
||||
logic [63:0] correctr, nextr, diffn, diffp;
|
||||
logic [10:0] rExp;
|
||||
logic rsign;
|
||||
integer testnum, errors;
|
||||
|
||||
// Divider
|
||||
srt #(52) srt(.clk, .Start(req),
|
||||
.Stall(1'b0), .Flush(1'b0),
|
||||
.SrcXFrac(a), .SrcYFrac(b),
|
||||
.XExp(aExp), .YExp(bExp), .rExp,
|
||||
.XSign(asign), .YSign(bsign), .rsign,
|
||||
.SrcXFrac(afrac), .SrcYFrac(bfrac),
|
||||
.SrcA('0), .SrcB('0), .Fmt(2'b00),
|
||||
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
|
||||
.Quot(r), .Rem(), .Flags());
|
||||
.Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags());
|
||||
|
||||
// Counter
|
||||
counter counter(clk, req, done);
|
||||
@ -88,7 +94,9 @@ module testbench;
|
||||
$readmemh ("testvectors", Tests);
|
||||
Vec = Tests[testnum];
|
||||
a = Vec[`mema];
|
||||
{asign, aExp, afrac} = a;
|
||||
b = Vec[`memb];
|
||||
{bsign, bExp, bfrac} = b;
|
||||
nextr = Vec[`memr];
|
||||
req <= #5 1;
|
||||
end
|
||||
@ -100,16 +108,23 @@ module testbench;
|
||||
if (done)
|
||||
begin
|
||||
req <= #5 1;
|
||||
diffp = correctr - r;
|
||||
diffn = r - correctr;
|
||||
if (($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
|
||||
diffp = correctr[51:0] - r;
|
||||
diffn = r - correctr[51:0];
|
||||
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
|
||||
begin
|
||||
errors = errors+1;
|
||||
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
|
||||
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
|
||||
$display("failed\n");
|
||||
$stop;
|
||||
end
|
||||
if (a === 52'hxxxxxxxxxxxxx)
|
||||
if (r !== rOTFC) // Check if OTFC works
|
||||
begin
|
||||
errors = errors+1;
|
||||
$display("OTFC is %h, should be %h\n", rOTFC, r);
|
||||
$display("failed\n");
|
||||
// $stop;
|
||||
end
|
||||
if (afrac === 52'hxxxxxxxxxxxxx)
|
||||
begin
|
||||
$display("%d Tests completed successfully", testnum);
|
||||
$stop;
|
||||
@ -122,9 +137,11 @@ module testbench;
|
||||
testnum = testnum+1;
|
||||
Vec = Tests[testnum];
|
||||
$display("a = %h b = %h",a,b);
|
||||
a = Vec[`mema];
|
||||
b = Vec[`memb];
|
||||
nextr = Vec[`memr];
|
||||
a = Vec[`mema];
|
||||
{asign, aExp, afrac} = a;
|
||||
b = Vec[`memb];
|
||||
{bsign, bExp, bfrac} = b;
|
||||
nextr = Vec[`memr];
|
||||
end
|
||||
end
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,123 +0,0 @@
|
||||
// testbench
|
||||
module testbench ();
|
||||
|
||||
logic [63:0] op1;
|
||||
logic [63:0] op2;
|
||||
logic [2:0] FOpCtrlE;
|
||||
logic [2:0] FrmE;
|
||||
logic op_type;
|
||||
logic FmtE;
|
||||
logic OvEn;
|
||||
logic UnEn;
|
||||
|
||||
logic XSgnE, YSgnE, ZSgnE;
|
||||
logic XSgnM, YSgnM;
|
||||
logic [10:0] XExpE, YExpE, ZExpE;
|
||||
logic [10:0] XExpM, YExpM, ZExpM;
|
||||
logic [52:0] XManE, YManE, ZManE;
|
||||
logic [52:0] XManM, YManM, ZManM;
|
||||
|
||||
logic [10:0] BiasE;
|
||||
logic XNaNE, YNaNE, ZNaNE;
|
||||
logic XNaNM, YNaNM, ZNaNM;
|
||||
logic XSNaNE, YSNaNE, ZSNaNE;
|
||||
logic XSNaNM, YSNaNM, ZSNaNM;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
logic XZeroE, YZeroE, ZZeroE;
|
||||
logic XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE;
|
||||
logic XInfM, YInfM, ZInfM;
|
||||
logic XExpMaxE;
|
||||
logic XNormE;
|
||||
logic FDivBusyE;
|
||||
|
||||
logic start;
|
||||
logic reset;
|
||||
|
||||
logic XDenorm;
|
||||
logic YDenorm;
|
||||
logic [63:0] AS_Result;
|
||||
logic [4:0] Flags;
|
||||
logic Denorm;
|
||||
logic done;
|
||||
|
||||
logic clk;
|
||||
logic [63:0] yexpected;
|
||||
logic [63:0] vectornum, errors; // bookkeeping variables
|
||||
logic [199:0] testvectors[50000:0]; // array of testvectors
|
||||
logic [7:0] flags_expected;
|
||||
|
||||
integer handle3;
|
||||
integer desc3;
|
||||
|
||||
// instantiate device under test
|
||||
unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
|
||||
.reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
|
||||
.XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
|
||||
.FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
|
||||
|
||||
|
||||
// current fpdivsqrt does not operation on denorms yet
|
||||
assign Denorm = XDenormE | YDenormE | Flags[3];
|
||||
|
||||
// generate clock to sequence tests
|
||||
always
|
||||
begin
|
||||
clk = 1; # 5; clk = 0; # 5;
|
||||
end
|
||||
|
||||
initial
|
||||
begin
|
||||
handle3 = $fopen("f64_div_rne.out");
|
||||
$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
|
||||
vectornum = 0; errors = 0;
|
||||
start = 1'b0;
|
||||
// reset
|
||||
reset = 1; #27; reset = 0;
|
||||
end
|
||||
|
||||
initial
|
||||
begin
|
||||
desc3 = handle3;
|
||||
// Operation (if applicable)
|
||||
#0 op_type = 1'b0;
|
||||
// Precision (32-bit or 64-bit)
|
||||
#0 FmtE = 1'b1;
|
||||
// From fctrl logic to dictate operation
|
||||
#0 FOpCtrlE = 3'b000;
|
||||
// Rounding Mode
|
||||
#0 FrmE = 3'b000;
|
||||
// Trap masking (n/a for RISC-V)
|
||||
#0 OvEn = 1'b0;
|
||||
#0 UnEn = 1'b0;
|
||||
end
|
||||
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if (~reset)
|
||||
begin
|
||||
#0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
|
||||
#50 start = 1'b1;
|
||||
repeat (2)
|
||||
@(posedge clk);
|
||||
// deassert start after 2 cycles
|
||||
start = 1'b0;
|
||||
repeat (10)
|
||||
@(posedge clk);
|
||||
$fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 200'bx) begin
|
||||
$display("%d tests completed", vectornum);
|
||||
$finish;
|
||||
end
|
||||
end // if (~reset)
|
||||
$display("%d vectors processed", vectornum);
|
||||
end // always @ (posedge clk)
|
||||
|
||||
endmodule // tb
|
||||
|
||||
|
@ -17,6 +17,7 @@ module testbenchfp;
|
||||
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
|
||||
logic [2:0] OpCtrl[]; // list of op controls
|
||||
logic [2:0] Unit[]; // list of units being tested
|
||||
logic WriteInt[]; // Is being written to integer resgiter
|
||||
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
|
||||
logic [1:0] Fmt[]; // list of formats for the other units
|
||||
logic [1:0] FmaFmt[]; // list of formats for the FMA
|
||||
@ -37,6 +38,7 @@ module testbenchfp;
|
||||
|
||||
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
|
||||
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
|
||||
logic WriteIntVal; // value of the current WriteInt
|
||||
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
|
||||
@ -53,8 +55,9 @@ module testbenchfp;
|
||||
logic [4:0] ResFlg; // Result flags
|
||||
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
|
||||
logic [`FPSIZES/3:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
|
||||
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes, CvtFpRes; // Results from each unit
|
||||
logic [4:0] FmaFlg, CvtFpFlg, DivFlg, CvtIntFlg, CmpFlg; // Outputed flags
|
||||
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
|
||||
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
|
||||
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
|
||||
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
|
||||
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
|
||||
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
|
||||
@ -108,7 +111,6 @@ module testbenchfp;
|
||||
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
|
||||
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
|
||||
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
|
||||
logic ZOrigDenorm, FmaRneZOrigDenorm, FmaRzZOrigDenorm, FmaRuZOrigDenorm, FmaRdZOrigDenorm, FmaRnmZOrigDenorm; // is the original precision dnormalized
|
||||
|
||||
// in-between FMA signals
|
||||
logic Mult;
|
||||
@ -150,6 +152,7 @@ module testbenchfp;
|
||||
Tests = {Tests, f128rv32cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
@ -159,6 +162,7 @@ module testbenchfp;
|
||||
Tests = {Tests, f128rv64cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
@ -172,39 +176,55 @@ module testbenchfp;
|
||||
Tests = {Tests, f128f64cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b01, 3'b11};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
end
|
||||
if(`F_SUPPORTED) begin // if single precision is supported
|
||||
// add the 128 <-> 32 bit conversions to the to-be-tested list
|
||||
Tests = {Tests, f128f32cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b00, 3'b11};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
if(`ZFH_SUPPORTED) begin // if half precision is supported
|
||||
// add the 128 <-> 16 bit conversions to the to-be-tested list
|
||||
Tests = {Tests, f128f16cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b10, 3'b11};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
|
||||
// add the compare tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128cmp};
|
||||
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
|
||||
for(int i = 0; i<15; i++) begin
|
||||
Unit = {Unit, `CMPUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -214,6 +234,7 @@ module testbenchfp;
|
||||
// add the addition tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128add};
|
||||
OpCtrl = {OpCtrl, `ADD_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -223,6 +244,7 @@ module testbenchfp;
|
||||
// add the subtraction tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128sub};
|
||||
OpCtrl = {OpCtrl, `SUB_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -232,6 +254,7 @@ module testbenchfp;
|
||||
// add the multiply tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128mul};
|
||||
OpCtrl = {OpCtrl, `MUL_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -241,6 +264,7 @@ module testbenchfp;
|
||||
// add the divide tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -250,6 +274,7 @@ module testbenchfp;
|
||||
// add the square-root tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128sqrt};
|
||||
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
@ -264,9 +289,7 @@ module testbenchfp;
|
||||
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
|
||||
// add the format for the Fma
|
||||
for(int i = 0; i<5; i++) begin
|
||||
FmaFmt = {FmaFmt, 2'b11};
|
||||
end
|
||||
FmaFmt = {FmaFmt, 2'b11};
|
||||
end
|
||||
end
|
||||
if (`D_SUPPORTED) begin // if double precision is supported
|
||||
@ -274,6 +297,7 @@ module testbenchfp;
|
||||
Tests = {Tests, f64rv32cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
@ -281,9 +305,10 @@ module testbenchfp;
|
||||
end
|
||||
if (`XLEN == 64) begin // if 64-bit integers are being supported
|
||||
Tests = {Tests, f64rv64cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -296,28 +321,39 @@ module testbenchfp;
|
||||
Tests = {Tests, f64f32cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b00, 3'b01};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
if(`ZFH_SUPPORTED) begin // if half precision is supported
|
||||
// add the 64 <-> 16 bit conversions to the to-be-tested list
|
||||
Tests = {Tests, f64f16cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b10, 3'b01};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64cmp};
|
||||
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
|
||||
for(int i = 0; i<15; i++) begin
|
||||
Unit = {Unit, `CMPUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -327,6 +363,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64add};
|
||||
OpCtrl = {OpCtrl, `ADD_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -336,6 +373,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64sub};
|
||||
OpCtrl = {OpCtrl, `SUB_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -345,6 +383,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64mul};
|
||||
OpCtrl = {OpCtrl, `MUL_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -354,6 +393,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -363,6 +403,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64sqrt};
|
||||
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
@ -376,9 +417,7 @@ module testbenchfp;
|
||||
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
FmaFmt = {FmaFmt, 2'b01};
|
||||
end
|
||||
FmaFmt = {FmaFmt, 2'b01};
|
||||
end
|
||||
end
|
||||
if (`F_SUPPORTED) begin // if single precision being supported
|
||||
@ -386,6 +425,7 @@ module testbenchfp;
|
||||
Tests = {Tests, f32rv32cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
@ -393,9 +433,10 @@ module testbenchfp;
|
||||
end
|
||||
if (`XLEN == 64) begin // if 64-bit integers are supported
|
||||
Tests = {Tests, f32rv64cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -408,17 +449,23 @@ module testbenchfp;
|
||||
Tests = {Tests, f32f16cvt};
|
||||
// add the op-ctrls (i.e. the format of the result)
|
||||
OpCtrl = {OpCtrl, 3'b10, 3'b00};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0};
|
||||
// add the unit being tested and fmt (input format)
|
||||
for(int i = 0; i<10; i++) begin
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `CVTFPUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32cmp};
|
||||
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
|
||||
for(int i = 0; i<15; i++) begin
|
||||
Unit = {Unit, `CMPUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -428,6 +475,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32add};
|
||||
OpCtrl = {OpCtrl, `ADD_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -437,6 +485,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32sub};
|
||||
OpCtrl = {OpCtrl, `SUB_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -446,6 +495,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32mul};
|
||||
OpCtrl = {OpCtrl, `MUL_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -455,6 +505,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -464,6 +515,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32sqrt};
|
||||
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
@ -473,13 +525,11 @@ module testbenchfp;
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
|
||||
// FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
|
||||
// FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
|
||||
// FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
|
||||
// FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
FmaFmt = {FmaFmt, 2'b00};
|
||||
// end
|
||||
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b00};
|
||||
end
|
||||
end
|
||||
if (`ZFH_SUPPORTED) begin // if half precision supported
|
||||
@ -487,26 +537,29 @@ module testbenchfp;
|
||||
Tests = {Tests, f16rv32cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
if (`XLEN == 64) begin // if 64-bit integers are supported
|
||||
Tests = {Tests, f16rv64cvtint, f16rv32cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
Tests = {Tests, f16rv64cvtint};
|
||||
// add the op-codes for these tests to the op-code list
|
||||
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
|
||||
// add what unit is used and the fmt to their lists (one for each test)
|
||||
for(int i = 0; i<20; i++) begin
|
||||
Unit = {Unit, `CVTINTUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16cmp};
|
||||
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
|
||||
for(int i = 0; i<15; i++) begin
|
||||
Unit = {Unit, `CMPUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -516,6 +569,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16add};
|
||||
OpCtrl = {OpCtrl, `ADD_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -525,6 +579,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16sub};
|
||||
OpCtrl = {OpCtrl, `SUB_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -534,6 +589,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16mul};
|
||||
OpCtrl = {OpCtrl, `MUL_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -543,6 +599,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -552,6 +609,7 @@ module testbenchfp;
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16sqrt};
|
||||
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
@ -561,13 +619,11 @@ module testbenchfp;
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
|
||||
// FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
|
||||
// FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
|
||||
// FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
|
||||
// FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
FmaFmt = {FmaFmt, 2'b10};
|
||||
// end
|
||||
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b10};
|
||||
end
|
||||
end
|
||||
|
||||
@ -606,6 +662,7 @@ module testbenchfp;
|
||||
always_comb UnitVal = Unit[TestNum];
|
||||
always_comb FmtVal = Fmt[TestNum];
|
||||
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
|
||||
always_comb WriteIntVal = WriteInt[OpCtrlNum];
|
||||
always_comb FrmVal = Frm[FrmNum];
|
||||
assign Mult = OpCtrlVal === 3'b100;
|
||||
|
||||
@ -624,7 +681,7 @@ module testbenchfp;
|
||||
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .ZOrigDenormE(FmaRneZOrigDenorm),
|
||||
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
|
||||
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
|
||||
.XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
@ -634,7 +691,7 @@ module testbenchfp;
|
||||
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .ZOrigDenormE(FmaRzZOrigDenorm),
|
||||
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
|
||||
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
|
||||
.XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
@ -644,7 +701,7 @@ module testbenchfp;
|
||||
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .ZOrigDenormE(FmaRuZOrigDenorm),
|
||||
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
|
||||
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
|
||||
.XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
@ -654,7 +711,7 @@ module testbenchfp;
|
||||
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .ZOrigDenormE(FmaRdZOrigDenorm),
|
||||
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
|
||||
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
|
||||
.XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
@ -663,7 +720,7 @@ module testbenchfp;
|
||||
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[VectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
|
||||
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .ZOrigDenormE(FmaRnmZOrigDenorm),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
|
||||
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
|
||||
.XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm),
|
||||
@ -673,7 +730,7 @@ module testbenchfp;
|
||||
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
|
||||
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .ZOrigDenormE(ZOrigDenorm),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
|
||||
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN),
|
||||
.XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm),
|
||||
@ -699,13 +756,12 @@ module testbenchfp;
|
||||
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
|
||||
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
|
||||
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
|
||||
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
|
||||
.ZExpM(FmaRneZExp), .ZOrigDenormM(FmaRneZOrigDenorm),
|
||||
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
|
||||
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
|
||||
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
|
||||
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
|
||||
@ -718,13 +774,12 @@ module testbenchfp;
|
||||
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
|
||||
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
|
||||
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
|
||||
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
|
||||
.ZExpM(FmaRzZExp), .ZOrigDenormM(FmaRzZOrigDenorm),
|
||||
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
|
||||
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
|
||||
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
|
||||
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
|
||||
@ -737,13 +792,12 @@ module testbenchfp;
|
||||
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
|
||||
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
|
||||
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
|
||||
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
|
||||
.ZExpM(FmaRuZExp), .ZOrigDenormM(FmaRuZOrigDenorm),
|
||||
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
|
||||
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
|
||||
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
|
||||
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
|
||||
@ -755,14 +809,13 @@ module testbenchfp;
|
||||
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
|
||||
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
|
||||
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
|
||||
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
|
||||
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
|
||||
.ZExpM(FmaRdZExp), .ZOrigDenormM(FmaRdZOrigDenorm),
|
||||
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
|
||||
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
|
||||
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
|
||||
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
|
||||
@ -775,13 +828,12 @@ module testbenchfp;
|
||||
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm),
|
||||
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
|
||||
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
|
||||
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
|
||||
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
|
||||
.ZExpM(FmaRnmZExp), .ZOrigDenormM(FmaRnmZOrigDenorm),
|
||||
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
|
||||
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
|
||||
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
|
||||
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
|
||||
@ -794,12 +846,11 @@ module testbenchfp;
|
||||
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
|
||||
.XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm),
|
||||
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
|
||||
.ZExpM(ZExp), .ZOrigDenormM(ZOrigDenorm),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
|
||||
@ -809,7 +860,12 @@ module testbenchfp;
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
|
||||
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
|
||||
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(Frmal), .FmtE(ModFmt), .CvtFpRes, .CvtFpFlgE);
|
||||
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
|
||||
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
|
||||
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
|
||||
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
|
||||
@ -901,38 +957,55 @@ module testbenchfp;
|
||||
AnsNaN = 1'b0;
|
||||
ResNaN = 1'b0;
|
||||
end
|
||||
else begin
|
||||
case (FmtVal)
|
||||
else if (UnitVal === `CVTFPUNIT) begin
|
||||
case (OpCtrlVal[1:0])
|
||||
4'b11: begin // quad
|
||||
AnsNaN = &Ans[`FLEN-2:`NF]&(|Ans[`NF-1:0]);
|
||||
ResNaN = &FmaRes[`FLEN-2:`NF]&(|FmaRes[`NF-1:0]);
|
||||
AnsNaN = &Ans[`Q_LEN-2:`NF]&(|Ans[`Q_NF-1:0]);
|
||||
ResNaN = &Res[`Q_LEN-2:`NF]&(|Res[`Q_NF-1:0]);
|
||||
end
|
||||
4'b01: begin // double
|
||||
AnsNaN = &Ans[`LEN1-2:`NF1]&(|Ans[`NF1-1:0]);
|
||||
ResNaN = &FmaRes[`LEN1-2:`NF1]&(|FmaRes[`NF1-1:0]);
|
||||
AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
|
||||
ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
|
||||
end
|
||||
4'b00: begin // single
|
||||
AnsNaN = &Ans[`LEN2-2:`NF2]&(|Ans[`NF2-1:0]);
|
||||
ResNaN = &FmaRes[`LEN2-2:`NF2]&(|FmaRes[`NF2-1:0]);
|
||||
AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
|
||||
ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
|
||||
end
|
||||
4'b10: begin // half
|
||||
AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
|
||||
ResNaN = &FmaRes[`H_LEN-2:`H_NF]&(|FmaRes[`H_NF-1:0]);
|
||||
ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
else begin
|
||||
case (FmtVal)
|
||||
4'b11: begin // quad
|
||||
AnsNaN = &Ans[`Q_LEN-2:`Q_NF]&(|Ans[`Q_NF-1:0]);
|
||||
ResNaN = &Res[`Q_LEN-2:`Q_NF]&(|Res[`Q_NF-1:0]);
|
||||
end
|
||||
4'b01: begin // double
|
||||
AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
|
||||
ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
|
||||
end
|
||||
4'b00: begin // single
|
||||
AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
|
||||
ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
|
||||
end
|
||||
4'b10: begin // half
|
||||
AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
|
||||
ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk) begin
|
||||
|
||||
always_comb begin
|
||||
// select the result to check
|
||||
case (UnitVal)
|
||||
`FMAUNIT: Res = FmaRes;
|
||||
`DIVUNIT: Res = DivRes;
|
||||
`CMPUNIT: Res = CmpRes;
|
||||
`CVTINTUNIT: Res = CvtRes;
|
||||
`CVTFPUNIT: Res = CvtFpRes;
|
||||
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
|
||||
`CVTFPUNIT: Res = CvtRes;
|
||||
endcase
|
||||
|
||||
// select the flag to check
|
||||
@ -940,9 +1013,13 @@ module testbenchfp;
|
||||
`FMAUNIT: ResFlg = FmaFlg;
|
||||
`DIVUNIT: ResFlg = DivFlg;
|
||||
`CMPUNIT: ResFlg = CmpFlg;
|
||||
`CVTINTUNIT: ResFlg = CvtIntFlg;
|
||||
`CVTFPUNIT: ResFlg = CvtFpFlg;
|
||||
`CVTINTUNIT: ResFlg = CvtFlg;
|
||||
`CVTFPUNIT: ResFlg = CvtFlg;
|
||||
endcase
|
||||
end
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk) begin
|
||||
|
||||
|
||||
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
|
||||
// - the sign of the NaN does not matter for the opperations being tested
|
||||
@ -1060,15 +1137,19 @@ module testbenchfp;
|
||||
else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format
|
||||
case (OpCtrlVal[1:0])
|
||||
2'b11: NaNGood = ((AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(AnsNaN&(Res[`Q_LEN-2:0] === Ans[`Q_LEN-2:0])) |
|
||||
(XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) |
|
||||
(YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})));
|
||||
2'b01: NaNGood = ((AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(AnsNaN&(Res[`D_LEN-2:0] === Ans[`D_LEN-2:0])) |
|
||||
(XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) |
|
||||
(YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})));
|
||||
2'b00: NaNGood = ((AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(AnsNaN&(Res[`S_LEN-2:0] === Ans[`S_LEN-2:0])) |
|
||||
(XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) |
|
||||
(YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})));
|
||||
2'b10: NaNGood = ((AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(AnsNaN&(Res[`H_LEN-2:0] === Ans[`H_LEN-2:0])) |
|
||||
(XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) |
|
||||
(YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})));
|
||||
endcase
|
||||
@ -1086,15 +1167,23 @@ module testbenchfp;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// check if the non-fma test is correct
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CMPUNIT)) begin
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
errors += 1;
|
||||
$display("There is an error in %s", Tests[TestNum]);
|
||||
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
|
||||
$stop;
|
||||
end
|
||||
// in The RISC-V Instruction Set Manual (2019) section 11.8 specifies that
|
||||
// if a any of the inputs to the EQ LT LE opperations then the opperation should return a 0
|
||||
else if ((UnitVal === `CMPUNIT)&(XNaN|YNaN)&(Res !== (`FLEN)'(0))) begin
|
||||
|
||||
// TestFloat sets the result to all 1's when there is an invalid result, however in
|
||||
// http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says
|
||||
// for an unsigned integer result 0 is also okay
|
||||
|
||||
// Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but
|
||||
// the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
|
||||
else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) |
|
||||
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) |
|
||||
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) |
|
||||
(Res === Ans | NaNGood | NaNGood === 1'bx)) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in %s", Tests[TestNum]);
|
||||
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
|
||||
@ -1147,6 +1236,8 @@ module testbenchfp;
|
||||
// increment the test
|
||||
TestNum += 1;
|
||||
|
||||
// clear the vectors
|
||||
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
// read next files
|
||||
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
|
||||
$readmemh({`PATH, FmaRneTests[TestNum]}, FmaRneVectors);
|
||||
@ -1197,7 +1288,6 @@ module readfmavectors (
|
||||
input logic [1:0] FmaFmt, // the format of the FMA inputs
|
||||
input logic [`FLEN*4+7:0] TestVector, // the test vector
|
||||
output logic [`FLEN-1:0] Ans, // the correct answer
|
||||
output logic ZOrigDenormE, // is z denormalized in it's original precision
|
||||
output logic [4:0] AnsFlg, // the correct flag
|
||||
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
|
||||
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
|
||||
@ -1244,10 +1334,10 @@ module readfmavectors (
|
||||
endcase
|
||||
end
|
||||
|
||||
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
|
||||
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
|
||||
.XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XExpMaxE, .ZOrigDenormE);
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XExpMaxE, .ZDenormE);
|
||||
endmodule
|
||||
|
||||
|
||||
@ -1287,7 +1377,6 @@ module readvectors (
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic XNormE, XExpMaxE,
|
||||
output logic ZOrigDenormE,
|
||||
output logic [`FLEN-1:0] X, Y, Z
|
||||
);
|
||||
|
||||
@ -1371,7 +1460,7 @@ module readvectors (
|
||||
Ans = TestVector[8];
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]};
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]};
|
||||
Ans = TestVector[8];
|
||||
end
|
||||
@ -1464,89 +1553,105 @@ module readvectors (
|
||||
case (Fmt)
|
||||
2'b11: begin // quad
|
||||
// {is the integer a long, is the opperation to an integer}
|
||||
casex ({OpCtrl[2], OpCtrl[0]})
|
||||
casex ({OpCtrl[2:1]})
|
||||
2'b11: begin // long -> quad
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)];
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // int -> quad
|
||||
2'b10: begin // int -> quad
|
||||
// correctly sign extend the integer depending on if it's a signed/unsigned test
|
||||
SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)]};
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+32-1]}}, TestVector[8+`Q_LEN+32-1:8+(`Q_LEN)]};
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b10: begin // quad -> long
|
||||
2'b01: begin // quad -> long
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // double -> long
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
|
||||
2'b00: begin // quad -> int
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
2'b01: begin // double
|
||||
// {is the integer a long, is the opperation to an integer}
|
||||
casex ({OpCtrl[2], OpCtrl[0]})
|
||||
// {Int->Fp?, is the integer a long}
|
||||
casex ({OpCtrl[2:1]})
|
||||
2'b11: begin // long -> double
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)];
|
||||
Ans = TestVector[8+(`D_LEN-1):8];
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b01: begin // int -> double
|
||||
2'b10: begin // int -> double
|
||||
// correctly sign extend the integer depending on if it's a signed/unsigned test
|
||||
SrcA = {{`XLEN-32{TestVector[8+`D_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)]};
|
||||
Ans = TestVector[8+(`D_LEN-1):8];
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = {{`XLEN-32{TestVector[8+`D_LEN+32-1]}}, TestVector[8+`D_LEN+32-1:8+(`D_LEN)]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // double -> long
|
||||
2'b01: begin // double -> long
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // double -> int
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
|
||||
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+32+`D_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
2'b00: begin // single
|
||||
// {is the integer a long, is the opperation to an integer}
|
||||
casex ({OpCtrl[2], OpCtrl[0]})
|
||||
casex ({OpCtrl[2:1]})
|
||||
2'b11: begin // long -> single
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)];
|
||||
Ans = TestVector[8+(`S_LEN-1):8];
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b01: begin // int -> single
|
||||
2'b10: begin // int -> single
|
||||
// correctly sign extend the integer depending on if it's a signed/unsigned test
|
||||
SrcA = {{`XLEN-32{TestVector[8+`S_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)]};
|
||||
Ans = TestVector[8+(`S_LEN-1):8];
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = {{`XLEN-32{TestVector[8+`S_LEN+32-1]}}, TestVector[8+`S_LEN+32-1:8+(`S_LEN)]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // single -> long
|
||||
2'b01: begin // single -> long
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single -> int
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
|
||||
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+32+`S_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
2'b10: begin // half
|
||||
// {is the integer a long, is the opperation to an integer}
|
||||
casex ({OpCtrl[2], OpCtrl[0]})
|
||||
casex ({OpCtrl[2:1]})
|
||||
2'b11: begin // long -> half
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)];
|
||||
Ans = TestVector[8+(`H_LEN-1):8];
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
2'b01: begin // int -> half
|
||||
2'b10: begin // int -> half
|
||||
// correctly sign extend the integer depending on if it's a signed/unsigned test
|
||||
SrcA = {{`XLEN-32{TestVector[8+`H_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)]};
|
||||
Ans = TestVector[8+(`H_LEN-1):8];
|
||||
X = {`FLEN{1'bx}};
|
||||
SrcA = {{`XLEN-32{TestVector[8+`H_LEN+32-1]}}, TestVector[8+`H_LEN+32-1:8+(`H_LEN)]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half -> long
|
||||
2'b01: begin // half -> long
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // half -> int
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
|
||||
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}}, TestVector[8+(`XLEN-1):8]};
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+32+`H_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@ -1557,5 +1662,5 @@ module readvectors (
|
||||
unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
|
||||
.XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
|
||||
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XExpMaxE, .ZOrigDenormE);
|
||||
.XExpMaxE);
|
||||
endmodule
|
@ -87,7 +87,7 @@ logic [3:0] dummy;
|
||||
"arch64m": if (`M_SUPPORTED) tests = arch64m;
|
||||
"arch64d": if (`D_SUPPORTED) tests = arch64d;
|
||||
"imperas64i": tests = imperas64i;
|
||||
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
|
||||
//"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
|
||||
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
|
||||
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
|
||||
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
|
||||
@ -110,7 +110,7 @@ logic [3:0] dummy;
|
||||
"arch32m": if (`M_SUPPORTED) tests = arch32m;
|
||||
"arch32f": if (`F_SUPPORTED) tests = arch32f;
|
||||
"imperas32i": tests = imperas32i;
|
||||
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
|
||||
//"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
|
||||
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
|
||||
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
|
||||
"wally32a": if (`A_SUPPORTED) tests = wally32a;
|
||||
@ -183,7 +183,7 @@ logic [3:0] dummy;
|
||||
|
||||
// read test vectors into memory
|
||||
pathname = tvpaths[tests[0].atoi()];
|
||||
/* if (tests[0] == `IMPERASTEST)
|
||||
/* if (tests[0] == `IMPERASTEST)
|
||||
pathname = tvpaths[0];
|
||||
else pathname = tvpaths[1]; */
|
||||
memfilename = {pathname, tests[test], ".elf.memfile"};
|
||||
@ -255,7 +255,7 @@ logic [3:0] dummy;
|
||||
//if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
|
||||
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
|
||||
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
|
||||
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
|
||||
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
|
||||
// report errors unless they are garbage at the end of the sim
|
||||
// kind of hacky test for garbage right now
|
||||
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
|
||||
@ -368,11 +368,12 @@ module riscvassertions;
|
||||
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
|
||||
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
|
||||
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
|
||||
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
|
||||
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
|
||||
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
|
||||
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
|
||||
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
|
||||
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
|
||||
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
|
||||
end
|
||||
endmodule
|
||||
|
||||
@ -408,47 +409,45 @@ module DCacheFlushFSM
|
||||
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
for(index = 0; index < numlines; index++) begin
|
||||
for(way = 0; way < numways; way++) begin
|
||||
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
|
||||
copyShadow #(.tagstart(tagstart),
|
||||
.loglinebytelen(loglinebytelen))
|
||||
copyShadow(.clk,
|
||||
.start,
|
||||
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
|
||||
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
|
||||
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
|
||||
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
|
||||
.index(index),
|
||||
.cacheWord(cacheWord),
|
||||
.CacheData(CacheData[way][index][cacheWord]),
|
||||
.CacheAdr(CacheAdr[way][index][cacheWord]),
|
||||
.CacheTag(CacheTag[way][index][cacheWord]),
|
||||
.CacheValid(CacheValid[way][index][cacheWord]),
|
||||
.CacheDirty(CacheDirty[way][index][cacheWord]));
|
||||
end
|
||||
end
|
||||
for(index = 0; index < numlines; index++) begin
|
||||
for(way = 0; way < numways; way++) begin
|
||||
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
|
||||
copyShadow #(.tagstart(tagstart),
|
||||
.loglinebytelen(loglinebytelen))
|
||||
copyShadow(.clk,
|
||||
.start,
|
||||
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
|
||||
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
|
||||
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
|
||||
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
|
||||
.index(index),
|
||||
.cacheWord(cacheWord),
|
||||
.CacheData(CacheData[way][index][cacheWord]),
|
||||
.CacheAdr(CacheAdr[way][index][cacheWord]),
|
||||
.CacheTag(CacheTag[way][index][cacheWord]),
|
||||
.CacheValid(CacheValid[way][index][cacheWord]),
|
||||
.CacheDirty(CacheDirty[way][index][cacheWord]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
integer i, j, k;
|
||||
integer i, j, k;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (start) begin #1
|
||||
#1
|
||||
for(i = 0; i < numlines; i++) begin
|
||||
for(j = 0; j < numways; j++) begin
|
||||
for(k = 0; k < numwords; k++) begin
|
||||
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
|
||||
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (start) begin #1
|
||||
#1
|
||||
for(i = 0; i < numlines; i++) begin
|
||||
for(j = 0; j < numways; j++) begin
|
||||
for(k = 0; k < numwords; k++) begin
|
||||
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
|
||||
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
flop #(1) doneReg(.clk, .d(start), .q(done));
|
||||
endmodule
|
||||
|
||||
|
@ -34,14 +34,14 @@
|
||||
`define LE_OPCTRL 3'b011
|
||||
`define LT_OPCTRL 3'b001
|
||||
`define EQ_OPCTRL 3'b010
|
||||
`define TO_UI_OPCTRL 3'b011
|
||||
`define TO_UI_OPCTRL 3'b000
|
||||
`define TO_I_OPCTRL 3'b001
|
||||
`define TO_UL_OPCTRL 3'b111
|
||||
`define TO_L_OPCTRL 3'b101
|
||||
`define FROM_UI_OPCTRL 3'b010
|
||||
`define FROM_I_OPCTRL 3'b000
|
||||
`define TO_UL_OPCTRL 3'b010
|
||||
`define TO_L_OPCTRL 3'b011
|
||||
`define FROM_UI_OPCTRL 3'b100
|
||||
`define FROM_I_OPCTRL 3'b101
|
||||
`define FROM_UL_OPCTRL 3'b110
|
||||
`define FROM_L_OPCTRL 3'b100
|
||||
`define FROM_L_OPCTRL 3'b111
|
||||
`define RNE 3'b000
|
||||
`define RZ 3'b001
|
||||
`define RU 3'b011
|
||||
@ -54,16 +54,6 @@
|
||||
`define CMPUNIT 4
|
||||
|
||||
string f16rv32cvtint[] = '{
|
||||
"f16_to_i32_rne.tv",
|
||||
"f16_to_i32_rz.tv",
|
||||
"f16_to_i32_ru.tv",
|
||||
"f16_to_i32_rd.tv",
|
||||
"f16_to_i32_rnm.tv",
|
||||
"f16_to_ui32_rne.tv",
|
||||
"f16_to_ui32_rz.tv",
|
||||
"f16_to_ui32_ru.tv",
|
||||
"f16_to_ui32_rd.tv",
|
||||
"f16_to_ui32_rnm.tv",
|
||||
"ui32_to_f16_rne.tv",
|
||||
"ui32_to_f16_rz.tv",
|
||||
"ui32_to_f16_ru.tv",
|
||||
@ -73,20 +63,20 @@ string f16rv32cvtint[] = '{
|
||||
"i32_to_f16_rz.tv",
|
||||
"i32_to_f16_ru.tv",
|
||||
"i32_to_f16_rd.tv",
|
||||
"i32_to_f16_rnm.tv"
|
||||
"i32_to_f16_rnm.tv",
|
||||
"f16_to_ui32_rne.tv",
|
||||
"f16_to_ui32_rz.tv",
|
||||
"f16_to_ui32_ru.tv",
|
||||
"f16_to_ui32_rd.tv",
|
||||
"f16_to_ui32_rnm.tv",
|
||||
"f16_to_i32_rne.tv",
|
||||
"f16_to_i32_rz.tv",
|
||||
"f16_to_i32_ru.tv",
|
||||
"f16_to_i32_rd.tv",
|
||||
"f16_to_i32_rnm.tv"
|
||||
};
|
||||
|
||||
string f16rv64cvtint[] = '{
|
||||
"f16_to_ui64_rne.tv",
|
||||
"f16_to_ui64_rz.tv",
|
||||
"f16_to_ui64_ru.tv",
|
||||
"f16_to_ui64_rd.tv",
|
||||
"f16_to_ui64_rnm.tv",
|
||||
"f16_to_i64_rne.tv",
|
||||
"f16_to_i64_rz.tv",
|
||||
"f16_to_i64_ru.tv",
|
||||
"f16_to_i64_rd.tv",
|
||||
"f16_to_i64_rnm.tv",
|
||||
"ui64_to_f16_rne.tv",
|
||||
"ui64_to_f16_rz.tv",
|
||||
"ui64_to_f16_ru.tv",
|
||||
@ -96,7 +86,17 @@ string f16rv64cvtint[] = '{
|
||||
"i64_to_f16_rz.tv",
|
||||
"i64_to_f16_ru.tv",
|
||||
"i64_to_f16_rd.tv",
|
||||
"i64_to_f16_rnm.tv"
|
||||
"i64_to_f16_rnm.tv",
|
||||
"f16_to_ui64_rne.tv",
|
||||
"f16_to_ui64_rz.tv",
|
||||
"f16_to_ui64_ru.tv",
|
||||
"f16_to_ui64_rd.tv",
|
||||
"f16_to_ui64_rnm.tv",
|
||||
"f16_to_i64_rne.tv",
|
||||
"f16_to_i64_rz.tv",
|
||||
"f16_to_i64_ru.tv",
|
||||
"f16_to_i64_rd.tv",
|
||||
"f16_to_i64_rnm.tv"
|
||||
};
|
||||
|
||||
string f32rv32cvtint[] = '{
|
||||
@ -307,16 +307,16 @@ string f128f32cvt[] = '{
|
||||
|
||||
|
||||
string f128f64cvt[] = '{
|
||||
"f64_to_f128_rne.tv",
|
||||
"f64_to_f128_rz.tv",
|
||||
"f64_to_f128_ru.tv",
|
||||
"f64_to_f128_rd.tv",
|
||||
"f64_to_f128_rnm.tv",
|
||||
"f128_to_f64_rne.tv",
|
||||
"f128_to_f64_rz.tv",
|
||||
"f128_to_f64_ru.tv",
|
||||
"f128_to_f64_rd.tv",
|
||||
"f128_to_f64_rnm.tv"
|
||||
"f128_to_f64_rnm.tv",
|
||||
"f64_to_f128_rne.tv",
|
||||
"f64_to_f128_rz.tv",
|
||||
"f64_to_f128_ru.tv",
|
||||
"f64_to_f128_rd.tv",
|
||||
"f64_to_f128_rnm.tv"
|
||||
};
|
||||
|
||||
string f16add[] = '{
|
||||
|
@ -17,6 +17,9 @@ if {$tech == "sky130"} {
|
||||
} elseif {$tech == "sky90"} {
|
||||
set s9lib $timing_lib/sky90/sky90_sc/V1.7.4/lib
|
||||
lappend search_path $s9lib
|
||||
} elseif {$tech == "tsmc28"} {
|
||||
set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a
|
||||
lappend search_path $s10lib
|
||||
}
|
||||
|
||||
# Synthetic libraries
|
||||
@ -30,6 +33,8 @@ if {$tech == "sky130"} {
|
||||
lappend target_library $s8lib/sky130_osu_sc_12T_ms_TT_1P8_25C.ccs.db
|
||||
} elseif {$tech == "sky90"} {
|
||||
lappend target_library $s9lib/scc9gena_tt_1.2v_25C.db
|
||||
} elseif {$tech == "tsmc28"} {
|
||||
lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db
|
||||
}
|
||||
|
||||
# Set Link Library
|
||||
|
@ -1,111 +1,135 @@
|
||||
#!/usr/bin/python3
|
||||
# Madeleine Masser-Frye mmasserfrye@hmc.edu 5/22
|
||||
|
||||
from distutils.log import error
|
||||
from statistics import median
|
||||
from operator import index
|
||||
import subprocess
|
||||
import statistics
|
||||
import csv
|
||||
import re
|
||||
from matplotlib.cbook import flatten
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.lines as lines
|
||||
import matplotlib.axes as axes
|
||||
import numpy as np
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
def getData(mod=None, width=None):
|
||||
specStr = ''
|
||||
if mod != None:
|
||||
specStr = mod
|
||||
if width != None:
|
||||
specStr += ('_'+str(width))
|
||||
specStr += '*'
|
||||
def synthsfromcsv(filename):
|
||||
Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy")
|
||||
with open(filename, newline='') as csvfile:
|
||||
csvreader = csv.reader(csvfile)
|
||||
global allSynths
|
||||
allSynths = list(csvreader)
|
||||
for i in range(len(allSynths)):
|
||||
for j in range(len(allSynths[0])):
|
||||
try: allSynths[i][j] = int(allSynths[i][j])
|
||||
except:
|
||||
try: allSynths[i][j] = float(allSynths[i][j])
|
||||
except: pass
|
||||
allSynths[i] = Synth(*allSynths[i])
|
||||
|
||||
def synthsintocsv():
|
||||
''' writes a CSV with one line for every available synthesis
|
||||
each line contains the module, tech, width, target freq, and resulting metrics
|
||||
'''
|
||||
print("This takes a moment...")
|
||||
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
|
||||
output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
allSynths = output.decode("utf-8").split('\n')[:-1]
|
||||
|
||||
bashCommand = "grep 'Critical Path Length' runs/ppa_{}/reports/*qor*".format(specStr)
|
||||
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
|
||||
linesCPL = outputCPL.decode("utf-8").split('\n')[:-1]
|
||||
specReg = re.compile('[a-zA-Z0-9]+')
|
||||
metricReg = re.compile('\d+\.\d+[e]?[-+]?\d*')
|
||||
|
||||
bashCommand = "grep 'Design Area' runs/ppa_{}/reports/*qor*".format(specStr)
|
||||
outputDA = subprocess.check_output(['bash','-c', bashCommand])
|
||||
linesDA = outputDA.decode("utf-8").split('\n')[:-1]
|
||||
|
||||
bashCommand = "grep '100' runs/ppa_{}/reports/*power*".format(specStr)
|
||||
outputP = subprocess.check_output(['bash','-c', bashCommand])
|
||||
linesP = outputP.decode("utf-8").split('\n')[:-1]
|
||||
|
||||
cpl = re.compile('\d{1}\.\d{6}')
|
||||
f = re.compile('_\d*_MHz')
|
||||
wm = re.compile('ppa_\w*_\d*_qor')
|
||||
da = re.compile('\d*\.\d{6}')
|
||||
p = re.compile('\d+\.\d+[e-]*\d+')
|
||||
|
||||
allSynths = []
|
||||
for i in range(len(linesCPL)):
|
||||
line = linesCPL[i]
|
||||
mwm = wm.findall(line)[0][4:-4].split('_')
|
||||
freq = int(f.findall(line)[0][1:-4])
|
||||
delay = float(cpl.findall(line)[0])
|
||||
area = float(da.findall(linesDA[i])[0])
|
||||
mod = mwm[0]
|
||||
width = int(mwm[1])
|
||||
|
||||
power = p.findall(linesP[i])
|
||||
lpower = float(power[2])
|
||||
denergy = float(power[1])*delay
|
||||
|
||||
oneSynth = [mod, width, freq, delay, area, lpower, denergy]
|
||||
allSynths += [oneSynth]
|
||||
|
||||
return allSynths
|
||||
|
||||
def getVals(module, var, freq=None):
|
||||
allSynths = getData(mod=module)
|
||||
|
||||
if (var == 'delay'):
|
||||
ind = 3
|
||||
units = " (ns)"
|
||||
elif (var == 'area'):
|
||||
ind = 4
|
||||
units = " (sq microns)"
|
||||
elif (var == 'lpower'):
|
||||
ind = 5
|
||||
units = " (nW)"
|
||||
elif (var == 'denergy'):
|
||||
ind = 6
|
||||
units = " (pJ)"
|
||||
else:
|
||||
error
|
||||
|
||||
widths = []
|
||||
metric = []
|
||||
if (freq != None):
|
||||
for oneSynth in allSynths:
|
||||
if (oneSynth[2] == freq):
|
||||
widths += [oneSynth[1]]
|
||||
metric += [oneSynth[ind]]
|
||||
else:
|
||||
widths = [8, 16, 32, 64, 128]
|
||||
for w in widths:
|
||||
m = 10000 # large number to start
|
||||
for oneSynth in allSynths:
|
||||
if (oneSynth[1] == w):
|
||||
if (oneSynth[3] < m):
|
||||
m = oneSynth[3]
|
||||
met = oneSynth[ind]
|
||||
metric += [met]
|
||||
return widths, metric, units
|
||||
|
||||
def writeCSV():
|
||||
allSynths = getData()
|
||||
file = open("ppaData.csv", "w")
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])
|
||||
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])
|
||||
|
||||
for one in allSynths:
|
||||
writer.writerow(one)
|
||||
for oneSynth in allSynths:
|
||||
module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7]
|
||||
tech = tech[:-2]
|
||||
metrics = []
|
||||
for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
|
||||
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
|
||||
bashCommand = bashCommand.format(*phrase)
|
||||
try: output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
except: print("At least one synth run doesn't have reports, try cleanup() first")
|
||||
nums = metricReg.findall(str(output))
|
||||
nums = [float(m) for m in nums]
|
||||
metrics += nums
|
||||
delay = metrics[0]
|
||||
area = metrics[1]
|
||||
lpower = metrics[4]
|
||||
denergy = (metrics[2] + metrics[3])*delay # (switching + internal powers)*delay
|
||||
|
||||
writer.writerow([module, tech, width, freq, delay, area, lpower, denergy])
|
||||
file.close()
|
||||
|
||||
def genLegend(fits, coefs, module, r2):
|
||||
def cleanup():
|
||||
''' removes runs that didn't work
|
||||
'''
|
||||
bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*'
|
||||
try:
|
||||
output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
allSynths = output.decode("utf-8").split('\n')[:-1]
|
||||
for run in allSynths:
|
||||
run = run.split('MHz')[0]
|
||||
bc = 'rm -r '+ run + '*'
|
||||
output = subprocess.check_output(['bash','-c', bc])
|
||||
except: pass
|
||||
|
||||
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
|
||||
output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
allSynths = output.decode("utf-8").split('\n')[:-1]
|
||||
for oneSynth in allSynths:
|
||||
for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
|
||||
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
|
||||
bashCommand = bashCommand.format(*phrase)
|
||||
try: output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
except:
|
||||
bc = 'rm -r '+ oneSynth[2:]
|
||||
try: output = subprocess.check_output(['bash','-c', bc])
|
||||
except: pass
|
||||
print("All cleaned up!")
|
||||
|
||||
def getVals(tech, module, var, freq=None):
|
||||
''' for a specified tech, module, and variable/metric
|
||||
returns a list of values for that metric in ascending width order
|
||||
works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width
|
||||
'''
|
||||
|
||||
global widths
|
||||
metric = []
|
||||
widthL = []
|
||||
|
||||
if (freq != None):
|
||||
for oneSynth in allSynths:
|
||||
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
|
||||
widthL += [oneSynth.width]
|
||||
osdict = oneSynth._asdict()
|
||||
metric += [osdict[var]]
|
||||
metric = [x for _, x in sorted(zip(widthL, metric))] # ordering
|
||||
else:
|
||||
for w in widths:
|
||||
m = 100000 # large number to start
|
||||
for oneSynth in allSynths:
|
||||
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module):
|
||||
if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq):
|
||||
m = oneSynth.delay
|
||||
osdict = oneSynth._asdict()
|
||||
met = osdict[var]
|
||||
try: metric += [met]
|
||||
except: pass
|
||||
|
||||
if ('flop' in module) & (var == 'area'):
|
||||
metric = [m/2 for m in metric] # since two flops in each module
|
||||
if (var == 'denergy'):
|
||||
metric = [m*1000 for m in metric] # more practical units for regression coefs
|
||||
|
||||
return metric
|
||||
|
||||
def genLegend(fits, coefs, r2, spec):
|
||||
''' generates a list of two legend elements
|
||||
labels line with fit equation and dots with tech and r squared of the fit
|
||||
'''
|
||||
|
||||
coefsr = [str(round(c, 3)) for c in coefs]
|
||||
|
||||
@ -127,26 +151,17 @@ def genLegend(fits, coefs, module, r2):
|
||||
eq += " + " + coefsr[ind] + "*Nlog2(N)"
|
||||
ind += 1
|
||||
|
||||
legend_elements = [lines.Line2D([0], [0], color='orange', label=eq),
|
||||
lines.Line2D([0], [0], color='steelblue', ls='', marker='o', label=' R^2='+ str(round(r2, 4)))]
|
||||
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq),
|
||||
lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +' $R^2$='+ str(round(r2, 4)))]
|
||||
return legend_elements
|
||||
|
||||
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
|
||||
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True):
|
||||
''' module: string module name
|
||||
freq: int freq (MHz)
|
||||
var: string delay, area, lpower, or denergy
|
||||
fits: constant, linear, square, log2, Nlog2
|
||||
plots given variable vs width for all matching syntheses with regression
|
||||
'''
|
||||
module: string module name
|
||||
freq: int freq (MHz)
|
||||
var: string delay, area, lpower, or denergy
|
||||
fits: constant, linear, square, log2, Nlog2
|
||||
plots chosen variable vs width for all matching syntheses with regression
|
||||
'''
|
||||
widths, metric, units = getVals(module, var, freq=freq)
|
||||
coefs, r2, funcArr = regress(widths, metric, fits)
|
||||
|
||||
xp = np.linspace(8, 140, 200)
|
||||
pred = []
|
||||
for x in xp:
|
||||
y = [func(x) for func in funcArr]
|
||||
pred += [sum(np.multiply(coefs, y))]
|
||||
|
||||
if ax is None:
|
||||
singlePlot = True
|
||||
@ -154,21 +169,48 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
|
||||
else:
|
||||
singlePlot = False
|
||||
|
||||
ax.scatter(widths, metric)
|
||||
ax.plot(xp, pred, color='orange')
|
||||
fullLeg = []
|
||||
global techSpecs
|
||||
global widths
|
||||
|
||||
legend_elements = genLegend(fits, coefs, module, r2)
|
||||
ax.legend(handles=legend_elements)
|
||||
global norms
|
||||
|
||||
for spec in techSpecs:
|
||||
metric = getVals(spec.tech, module, var, freq=freq)
|
||||
|
||||
if norm:
|
||||
techdict = spec._asdict()
|
||||
norm = techdict[var]
|
||||
metric = [m/norm for m in metric] # comment out to not normalize
|
||||
|
||||
if len(metric) == 5:
|
||||
xp, pred, leg = regress(widths, metric, spec, fits)
|
||||
fullLeg += leg
|
||||
|
||||
ax.scatter(widths, metric, color=spec.color, marker=spec.shape)
|
||||
ax.plot(xp, pred, color=spec.color)
|
||||
|
||||
ax.legend(handles=fullLeg)
|
||||
|
||||
ax.set_xticks(widths)
|
||||
ax.set_xlabel("Width (bits)")
|
||||
ax.set_ylabel(str.title(var) + units)
|
||||
|
||||
if norm:
|
||||
ylabeldic = {"lpower": "Normalized Leakage Power", "denergy": "Normalized Dynamic Energy", "area": "INVx1 Areas", "delay": "FO4 Delays"}
|
||||
else:
|
||||
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ-CHECK)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
|
||||
|
||||
ax.set_ylabel(ylabeldic[var])
|
||||
|
||||
if singlePlot:
|
||||
ax.set_title(module + " (target " + str(freq) + "MHz)")
|
||||
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
|
||||
ax.set_title(module + titleStr)
|
||||
plt.show()
|
||||
|
||||
def regress(widths, var, fits='clsgn'):
|
||||
def regress(widths, var, spec, fits='clsgn'):
|
||||
''' fits a curve to the given points
|
||||
returns lists of x and y values to plot that curve and legend elements with the equation
|
||||
'''
|
||||
|
||||
funcArr = genFuncs(fits)
|
||||
|
||||
@ -187,9 +229,22 @@ def regress(widths, var, fits='clsgn'):
|
||||
except:
|
||||
resid = 0
|
||||
r2 = 1 - resid / (y.size * y.var())
|
||||
return coefs, r2, funcArr
|
||||
|
||||
def makeCoefTable():
|
||||
xp = np.linspace(8, 140, 200)
|
||||
pred = []
|
||||
for x in xp:
|
||||
n = [func(x) for func in funcArr]
|
||||
pred += [sum(np.multiply(coefs, n))]
|
||||
|
||||
leg = genLegend(fits, coefs, r2, spec)
|
||||
|
||||
return xp, pred, leg
|
||||
|
||||
def makeCoefTable(tech):
|
||||
''' not currently in use, may salvage later
|
||||
writes CSV with each line containing the coefficients for a regression fit
|
||||
to a particular combination of module, metric, and target frequency
|
||||
'''
|
||||
file = open("ppaFitting.csv", "w")
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
|
||||
@ -198,7 +253,8 @@ def makeCoefTable():
|
||||
for comb in [['delay', 5000], ['area', 5000], ['area', 10]]:
|
||||
var = comb[0]
|
||||
freq = comb[1]
|
||||
widths, metric, units = getVals(mod, freq, var)
|
||||
metric = getVals(tech, mod, freq, var)
|
||||
global widths
|
||||
coefs, r2, funcArr = regress(widths, metric)
|
||||
row = [mod] + comb + np.ndarray.tolist(coefs) + [r2]
|
||||
writer.writerow(row)
|
||||
@ -206,6 +262,9 @@ def makeCoefTable():
|
||||
file.close()
|
||||
|
||||
def genFuncs(fits='clsgn'):
|
||||
''' helper function for regress()
|
||||
returns array of functions with one for each term desired in the regression fit
|
||||
'''
|
||||
funcArr = []
|
||||
if 'c' in fits:
|
||||
funcArr += [lambda x: 1]
|
||||
@ -220,78 +279,188 @@ def genFuncs(fits='clsgn'):
|
||||
return funcArr
|
||||
|
||||
def noOutliers(freqs, delays, areas):
|
||||
''' returns a pared down list of freqs, delays, and areas
|
||||
cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area
|
||||
helper function to freqPlot()
|
||||
'''
|
||||
f=[]
|
||||
d=[]
|
||||
a=[]
|
||||
try:
|
||||
med = statistics.median(freqs)
|
||||
for i in range(len(freqs)):
|
||||
norm = freqs[i]/med
|
||||
if (norm > 0.25) & (norm<1.75):
|
||||
f += [freqs[i]]
|
||||
d += [delays[i]]
|
||||
a += [areas[i]]
|
||||
except: pass
|
||||
ind = delays.index(min(delays))
|
||||
med = freqs[ind]
|
||||
for i in range(len(freqs)):
|
||||
norm = freqs[i]/med
|
||||
if (norm > 0.25) & (norm<1.75):
|
||||
f += [freqs[i]]
|
||||
d += [delays[i]]
|
||||
a += [areas[i]]
|
||||
|
||||
return f, d, a
|
||||
|
||||
def freqPlot(mod, width):
|
||||
allSynths = getData(mod=mod, width=width)
|
||||
|
||||
freqsV, delaysV, areasV, freqsA, delaysA, areasA = ([] for i in range(6))
|
||||
def freqPlot(tech, mod, width):
|
||||
''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
|
||||
'''
|
||||
global allSynths
|
||||
freqsL, delaysL, areasL = ([[], []] for i in range(3))
|
||||
for oneSynth in allSynths:
|
||||
if (mod == oneSynth[0]) & (width == oneSynth[1]):
|
||||
if (1000/oneSynth[3] < oneSynth[2]):
|
||||
freqsV += [oneSynth[2]]
|
||||
delaysV += [oneSynth[3]]
|
||||
areasV += [oneSynth[4]]
|
||||
else:
|
||||
freqsA += [oneSynth[2]]
|
||||
delaysA += [oneSynth[3]]
|
||||
areasA += [oneSynth[4]]
|
||||
if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
|
||||
ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
|
||||
freqsL[ind] += [oneSynth.freq]
|
||||
delaysL[ind] += [oneSynth.delay]
|
||||
areasL[ind] += [oneSynth.area]
|
||||
|
||||
freqsV, delaysV, areasV = noOutliers(freqsV, delaysV, areasV)
|
||||
freqsA, delaysA, areasA = noOutliers(freqsA, delaysA, areasA)
|
||||
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)
|
||||
|
||||
adprodA = np.multiply(areasA, delaysA)
|
||||
adsqA = np.multiply(adprodA, delaysA)
|
||||
adprodV = np.multiply(areasV, delaysV)
|
||||
adsqV = np.multiply(adprodV, delaysV)
|
||||
for ind in [0,1]:
|
||||
areas = areasL[ind]
|
||||
delays = delaysL[ind]
|
||||
freqs = freqsL[ind]
|
||||
|
||||
if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
|
||||
freqs, delays, areas = noOutliers(freqs, delays, areas) # comment out to see all syntheses
|
||||
|
||||
c = 'blue' if ind else 'green'
|
||||
adprod = adprodpow(areas, delays, 1)
|
||||
adpow = adprodpow(areas, delays, 2)
|
||||
ax1.scatter(freqs, delays, color=c)
|
||||
ax2.scatter(freqs, areas, color=c)
|
||||
ax3.scatter(freqs, adprod, color=c)
|
||||
ax4.scatter(freqs, adpow, color=c)
|
||||
|
||||
legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'),
|
||||
lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]
|
||||
|
||||
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)
|
||||
ax1.scatter(freqsA, delaysA, color='green')
|
||||
ax1.scatter(freqsV, delaysV, color='blue')
|
||||
ax2.scatter(freqsA, areasA, color='green')
|
||||
ax2.scatter(freqsV, areasV, color='blue')
|
||||
ax3.scatter(freqsA, adprodA, color='green')
|
||||
ax3.scatter(freqsV, adprodV, color='blue')
|
||||
ax4.scatter(freqsA, adsqA, color='green')
|
||||
ax4.scatter(freqsV, adsqV, color='blue')
|
||||
ax1.legend(handles=legend_elements)
|
||||
|
||||
ax4.set_xlabel("Target Freq (MHz)")
|
||||
ax1.set_ylabel('Delay (ns)')
|
||||
ax2.set_ylabel('Area (sq microns)')
|
||||
ax3.set_ylabel('Area * Delay')
|
||||
ax4.set_ylabel('Area * Delay^2')
|
||||
ax4.set_ylabel('Area * $Delay^2$')
|
||||
ax1.set_title(mod + '_' + str(width))
|
||||
plt.show()
|
||||
|
||||
def plotPPA(mod, freq=None):
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
oneMetricPlot(mod, 'delay', ax=axs[0,0], fits='clg', freq=freq)
|
||||
oneMetricPlot(mod, 'area', ax=axs[0,1], fits='s', freq=freq)
|
||||
oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='c', freq=freq)
|
||||
oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='s', freq=freq)
|
||||
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " min delay"
|
||||
plt.suptitle(mod + titleStr)
|
||||
def squareAreaDelay(tech, mod, width):
|
||||
''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
|
||||
'''
|
||||
global allSynths
|
||||
freqsL, delaysL, areasL = ([[], []] for i in range(3))
|
||||
for oneSynth in allSynths:
|
||||
if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
|
||||
ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
|
||||
freqsL[ind] += [oneSynth.freq]
|
||||
delaysL[ind] += [oneSynth.delay]
|
||||
areasL[ind] += [oneSynth.area]
|
||||
|
||||
f, (ax1) = plt.subplots(1, 1)
|
||||
ax2 = ax1.twinx()
|
||||
|
||||
for ind in [0,1]:
|
||||
areas = areasL[ind]
|
||||
delays = delaysL[ind]
|
||||
targets = freqsL[ind]
|
||||
targets = [1000/f for f in targets]
|
||||
|
||||
if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
|
||||
targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all
|
||||
|
||||
if not ind:
|
||||
achievedDelays = delays
|
||||
|
||||
c = 'blue' if ind else 'green'
|
||||
ax1.scatter(targets, delays, marker='^', color=c)
|
||||
ax2.scatter(targets, areas, marker='s', color=c)
|
||||
|
||||
bestAchieved = min(achievedDelays)
|
||||
|
||||
legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'),
|
||||
lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'),
|
||||
lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'),
|
||||
lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')]
|
||||
|
||||
ax2.legend(handles=legend_elements, loc='upper left')
|
||||
|
||||
ax1.set_xlabel("Delay Targeted (ns)")
|
||||
ax1.set_ylabel("Delay Achieved (ns)")
|
||||
ax2.set_ylabel('Area (sq microns)')
|
||||
ax1.set_title(mod + '_' + str(width))
|
||||
|
||||
squarify(f)
|
||||
|
||||
xvals = np.array(ax1.get_xlim())
|
||||
frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0])
|
||||
areaLowerLim = min(flatten(areasL))-100
|
||||
areaUpperLim = max(flatten(areasL))/frac + areaLowerLim
|
||||
ax2.set_ylim([areaLowerLim, areaUpperLim])
|
||||
ax1.plot(xvals, xvals, ls="--", c=".3")
|
||||
ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--')
|
||||
|
||||
plt.show()
|
||||
|
||||
# writeCSV()
|
||||
# makeCoefTable()
|
||||
def squarify(fig):
|
||||
''' helper function for squareAreaDelay()
|
||||
forces matplotlib figure to be a square
|
||||
'''
|
||||
w, h = fig.get_size_inches()
|
||||
if w > h:
|
||||
t = fig.subplotpars.top
|
||||
b = fig.subplotpars.bottom
|
||||
axs = h*(t-b)
|
||||
l = (1.-axs/w)/2
|
||||
fig.subplots_adjust(left=l, right=1-l)
|
||||
else:
|
||||
t = fig.subplotpars.right
|
||||
b = fig.subplotpars.left
|
||||
axs = w*(t-b)
|
||||
l = (1.-axs/h)/2
|
||||
fig.subplots_adjust(bottom=l, top=1-l)
|
||||
|
||||
freqPlot('flopr', 128)
|
||||
def adprodpow(areas, delays, pow):
|
||||
''' for each value in [areas] returns area*delay^pow
|
||||
helper function for freqPlot'''
|
||||
result = []
|
||||
|
||||
# plotPPA('add')
|
||||
for i in range(len(areas)):
|
||||
result += [(areas[i])*(delays[i])**pow]
|
||||
|
||||
return result
|
||||
|
||||
def plotPPA(mod, freq=None, norm=True):
|
||||
''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits
|
||||
if no freq specified, uses the synthesis with best achievable delay for each width
|
||||
overlays data from both techs
|
||||
'''
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
global fitDict
|
||||
modFit = fitDict[mod]
|
||||
oneMetricPlot(mod, 'delay', ax=axs[0,0], fits=modFit[0], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=freq, norm=norm)
|
||||
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
|
||||
plt.suptitle(mod + titleStr)
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# set up stuff, global variables
|
||||
widths = [8, 16, 32, 64, 128]
|
||||
# fitDict in progress
|
||||
fitDict = {'add': ['cg', 'cl'], 'mult': ['clg', 's'], 'comparator': ['clsgn', 'clsgn'], 'csa': ['clsgn', 'clsgn'], 'shiftleft': ['clsgn', 'clsgn'], 'flop': ['cl', 'cl'], 'priorityencoder': ['clsgn', 'clsgn']}
|
||||
TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
|
||||
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1.96, 1.98, 1], ['gf32', 'purple', 's', 15e-3, .351, .3116, 1], ['tsmc28', 'blue', '^', 12.2e-3, .252, 1.09, 1]]
|
||||
techSpecs = [TechSpec(*t) for t in techSpecs]
|
||||
|
||||
# cleanup()
|
||||
# synthsintocsv() # slow, run only when new synth runs to add to csv
|
||||
|
||||
synthsfromcsv('ppaData.csv') # your csv here!
|
||||
|
||||
### examples
|
||||
# for mod in ['comparator', 'priorityencoder', 'shiftleft']:
|
||||
# for w in [16, 32]:
|
||||
# freqPlot('sky90', mod, w) # the weird ones
|
||||
# squareAreaDelay('sky90', 'add', 32)
|
||||
# oneMetricPlot('add', 'delay')
|
||||
for mod in ['add', 'csa', 'mult', 'comparator', 'priorityencoder', 'shiftleft', 'flop']:
|
||||
plotPPA(mod, norm=False) # no norm input now defaults to normalized
|
2038
synthDC/ppaData.csv
2038
synthDC/ppaData.csv
File diff suppressed because it is too large
Load Diff
@ -42,21 +42,22 @@ def getData():
|
||||
return allSynths
|
||||
|
||||
allSynths = getData()
|
||||
arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 14, 20, 40]
|
||||
arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20, 40]
|
||||
|
||||
widths = [32, 64, 128]
|
||||
modules = ['flopr']
|
||||
tech = 'sky90'
|
||||
widths = [16, 8, 32, 64, 128]
|
||||
modules = ['add']
|
||||
tech = 'tsmc28'
|
||||
LoT = []
|
||||
|
||||
# # # initial sweep to get estimate of min delay
|
||||
# freqs = ['7500']
|
||||
# freqs = [25000, 35000]
|
||||
# for module in modules:
|
||||
# for width in widths:
|
||||
# for freq in freqs:
|
||||
# LoT += [[module, width, tech, freq]]
|
||||
|
||||
# thorough sweep based on estimate of min delay
|
||||
|
||||
# # thorough sweep based on estimate of min delay
|
||||
for m in modules:
|
||||
for w in widths:
|
||||
delays = []
|
||||
@ -69,7 +70,6 @@ for m in modules:
|
||||
LoT += [[m, w, tech, freq]]
|
||||
|
||||
deleteRedundant(LoT)
|
||||
|
||||
pool = Pool()
|
||||
pool.starmap(runCommand, LoT)
|
||||
pool.close()
|
@ -117,6 +117,10 @@ if {$tech == "sky130"} {
|
||||
} else {
|
||||
set_driving_cell -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk
|
||||
}
|
||||
} elseif {$tech == "tsmc28"} {
|
||||
if ($drive == "INV") {
|
||||
set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk
|
||||
}
|
||||
}
|
||||
|
||||
# Set input/output delay
|
||||
@ -132,6 +136,10 @@ if {$tech == "sky130"} {
|
||||
} else {
|
||||
set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs]
|
||||
}
|
||||
} elseif {$tech == "tsmc28"} {
|
||||
if ($drive == "INV") {
|
||||
set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs]
|
||||
}
|
||||
}
|
||||
|
||||
# Set the wire load model
|
||||
|
@ -98,101 +98,101 @@ $BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
|
||||
echo "Creating f16_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
echo "Creating f32_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
echo "Creating f64_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
echo "Creating f128_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
echo "Creating f16_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
echo "Creating f32_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
echo "Creating f64_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
echo "Creating f128_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
echo "Creating f16_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
echo "Creating f32_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
echo "Creating f64_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
echo "Creating f128_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
echo "Creating f16_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
echo "Creating f32_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
echo "Creating f64_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
echo "Creating f128_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
echo "Creating f16_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
|
||||
|
Loading…
Reference in New Issue
Block a user