fixed merge conflicts

This commit is contained in:
David Harris 2022-05-28 09:44:55 +00:00
commit 80315fedff
29 changed files with 3717 additions and 2683 deletions

View File

@ -1,15 +1,39 @@
# Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally
all: Makefile
../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py
all: build sim
# view with
# more `ls -t | head -1`
allClean: clean all
build:
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles"
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"
sim: modelSimBuild size speed
modelSimBuild: objdump
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
size:
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
objdump:
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S "$$f" > "$$f.objdump"; done
clean:
rm -rf ../../addins/embench-iot/bd_speed/
rm -rf ../../addins/embench-iot/bd_size/
# std:
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
# --dummy-libs="libgcc libm libc"
# --cflags "-O2 -g -nostartfiles"
#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
# --user-libs="-lm"
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c

View File

@ -1,7 +0,0 @@
# Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally
all: Makefile
./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py

View File

@ -32,7 +32,7 @@ vlib work
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# $num = the added words after the call
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697
vsim -voptargs=+acc work.testbenchfp -G TEST=$2

View File

@ -46,7 +46,7 @@ configs = [
]
def getBuildrootTC(short):
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
MAX_EXPECTED = 246000000
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
if short:
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
BRgrepstr=str(INSTR_LIMIT)+" instructions"

View File

@ -1,2 +1,2 @@
vsim -do "do wally-pipelined.do rv64gc imperas64f"
vsim -do "do wally-pipelined.do rv64gc imperas64d"

View File

@ -51,7 +51,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
#vsim -coverage -lib work_$2 workopt_$2
# power add generates the logging necessary for saif generation.
# power add generates the logging necessary for said generation.
# power add -r /dut/core/*
run -all
# power off -r /dut/core/*

View File

@ -11,97 +11,97 @@ module fctrl (
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResultSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [2:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
output logic FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
);
`define FCTRLW 14
`define FCTRLW 13
logic [`FCTRLW-1:0] ControlsD;
// FPU Instruction Decoder
always_comb
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1;
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
else case(OpD)
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_00_000_000_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_00_001_000_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_010_000_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_011_000_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_000_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_000_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_000_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_000_00_0_0; // fnmadd
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_000_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_000_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_000_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_000_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_000_00_1_0; // fsqrt
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_000_001_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_11_001_001_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_11_010_001_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_111_010_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_11_101_010_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_11_010_010_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_11_001_010_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_11_011_010_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_000_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_000_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_000_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.s.lu
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])//***reduce resSel
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.s
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_000_00_0_0; // fmv.w.x
7'b010000?: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.s.d
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.d.lu
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.d
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_000_00_0_0; // fmv.d.x
//7'b0100001: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_000_100_00_0_1; // non-implemented instruction
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
endcase
// unswizzle control bits
@ -119,7 +119,7 @@ module fctrl (
// Precision
// 0-single
// 1-double
assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : FResSelD == 3'b100 | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// FResultSel:
// 000 - ReadRes - load

803
pipelined/src/fpu/fcvt.sv Normal file
View File

@ -0,0 +1,803 @@
`include "wally-config.vh"
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
module fcvt (
input logic XSgnE, // input's sign
input logic [`NE-1:0] XExpE, // input's exponent
input logic [`NF:0] XManE, // input's fraction
input logic [`XLEN-1:0] ForwardedSrcAE, // integer input - from IEU
input logic [2:0] FOpCtrlE, // choose which opperation (look below for values)
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
input logic XZeroE, // is the input zero
input logic XDenormE, // is the input denormalized
input logic XInfE, // is the input infinity
input logic XNaNE, // is the input a NaN
input logic XSNaNE, // is the input a signaling NaN
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
output logic [4:0] CvtFlgE // the conversion's flags
);
// OpCtrls:
// fp->fp conversions: {0, output precision} - only one of the operations writes to the int register
// half - 10
// single - 00
// double - 01
// quad - 11
// int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?}
// bit 2 bit 1 bit 0
// for example: signed long -> single floating point has the OpCode 101
// (FF) fp -> fp coversion signals
// (IF) int -> fp coversion signals
// (FI) fp -> int coversion signals
logic [`FPSIZES/3:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CalcExp; // the calculated expoent
logic [$clog2(`LGLEN)-1:0] ShiftAmt; // how much to shift by
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
logic ResDenormUf;// does the result underflow or is denormalized
logic ResUf; // does the result underflow
logic [`LGLEN+`NF:0] Shifted; // the shifted result
logic [`NE-2:0] NewBias; // the bias of the final result
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [`NE-1:0] OldExp; // the old exponent
logic ResSgn; // the result's sign
logic Sticky; // sticky bit - for rounding
logic Round; // round bit - for rounding
logic LSBFrac; // the least significant bit of the fraction - for rounding
logic CalcPlus1; // the calculated plus 1
logic Plus1; // add one to the final result?
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
logic [`NF-1:0] ResFrac; // the result's fraction
logic [`XLEN+1:0] NegRes; // the negation of the result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic Overflow, Underflow, Inexact, Invalid; // flags
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
logic KillRes; // kill the result?
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC
// seperate OpCtrl for code readability
assign Signed = FOpCtrlE[0];
assign Int64 = FOpCtrlE[1];
assign IntToFp = FOpCtrlE[2];
assign ToInt = FWriteIntE;
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: FmtE contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? FmtE : FOpCtrlE[1:0];
///////////////////////////////////////////////////////////////////////////
// negation
///////////////////////////////////////////////////////////////////////////
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
///////////////////////////////////////////////////////////////////////////
// lzc
///////////////////////////////////////////////////////////////////////////
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | lzcIn | 0's if nessisary |
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
{LzcIn, {`NF+1{1'b0}}};
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
// - don't shift if supposed to shift right (underflowed or denorm input)
// denormalized/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by ZeroCnt+1 - to shift till the result is normalized
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} :
ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] :
(ZeroCnt+1)&{$clog2(`LGLEN){XDenormE|IntToFp}};
// shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
assign Shifted = ShiftIn << ShiftAmt;
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
// *** possible optimizaations:
// - if subtracting exp by bias only the msb needs a full adder, the rest can be HA - dunno how to implement this for synth
// - Smaller exp -> Larger Exp can be calculated with: *** can use in Other units??? FMA??? insert this thing in later
// Exp if in range: {~Exp[SNE-1], Exp[SNE-2:0]}
// Exp in range if: Exp[SNE-1] = 1 & Exp[LNE-2:SNE] = 1111... & Exp[LNE-1] = 0 | Exp[SNE-1] = 0 & Exp[LNE-2:SNE] = 000... & Exp[LNE-1] = 1
// i.e.: &Exp[LNE-2:SNE-1] xor Exp[LNE-1]
// Too big if: Exp[LNE-1] = 1
// Too small if: none of the above
// Select the bias of the output
// fp -> int : select 1
// ??? -> fp : pick the new bias depending on the output format
if (`FPSIZES == 1) begin
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
default: NewBiasToFp = 1'bx;
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 4) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end
// select the old exponent
// int -> fp : largest bias + XLEN
// fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : XExpE;
// calculate CalcExp
// fp -> fp :
// - XExp - Largest bias + new bias - (ZeroCnt+1)
// only do ^ if the input was denormalized
// - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
// - correct the expoent when there is a normalization shift ( + ZeroCnt+1)
// fp -> int : XExp - Largest Bias + 1 - (ZeroCnt+1)
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is denormalized then we dont shift... so the "- (ZeroCnt+1)" is just leftovers from other options
// int -> fp : largest bias XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
// Process:
// - shifted right by XLEN (XLEN)
// - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent
//
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XDenormE|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -`NF;
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -`NF : -`NF1;
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -`NF;
`FMT1: ResNegNF = -`NF1;
`FMT2: ResNegNF = -`NF2;
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -`Q_NF;
2'h1: ResNegNF = -`D_NF;
2'h0: ResNegNF = -`S_NF;
2'h2: ResNegNF = -`H_NF;
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
///////////////////////////////////////////////////////////////////////////
// sign
///////////////////////////////////////////////////////////////////////////
// determine the sign of the result
// - if int -> fp
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
///////////////////////////////////////////////////////////////////////////
// rounding
///////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// 11 - Plus1
// round to zero - do nothing
// round to -infinity - Plus1 if negative
// round to infinity - Plus1 if positive
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 1x - Plus1
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
if (`FPSIZES == 1) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
end else if (`FPSIZES == 2) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
end else if (`FPSIZES == 3) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
`FMT: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
end
`FMT1: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
end
`FMT2: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
end
default: begin
ToFpSticky = 1'bx;
ToFpRound = 1'bx;
ToFpLSBFrac = 1'bx;
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end else if (`FPSIZES == 4) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
2'h3: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
end
2'h1: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
end
2'h0: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
end
2'h2: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end
always_comb
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn;//round down
3'b011: CalcPlus1 = ~ResSgn;//round up
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// dont round if exact
assign Plus1 = CalcPlus1&(Round|Sticky);
// shift the 1 to the propper position for rounding
// - dont round it converting to integer
if (`FPSIZES == 1) begin
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
end else if (`FPSIZES == 2) begin
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
default: ShiftedPlus1 = 0;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
endcase
end
// kill calcExp if the result is denormalized
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
// trim the result's expoent to size
assign ResExp = FullResExp[`NE-1:0];
///////////////////////////////////////////////////////////////////////////
// flags
///////////////////////////////////////////////////////////////////////////
// calculate the flags
// find the maximum exponent (the exponent and larger overflows)
if (`FPSIZES == 1) begin
assign MaxExp = ToInt ? Int64 ? 65 : 33 : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt ? Int64 ? 65 : 33 :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
end
// if the result exponent is larger then the maximum possible exponent
// | and the exponent is positive
// | | and the input is not NaN or Infinity
// | | |
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
// if the result is denormalized or underflowed
// | and the result did not round into normal values
// | | and the result is not exact
// | | | and the result isn't NaN
// | | | |
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
// if there were bits thrown away
// | if overflowed or underflowed
// | | and if not a NaN
// | | |
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
// if the result is too small to be represented and not 0
// | and if the result is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// if an input was a singaling NaN(and we're using a FP input)
// |
assign FpInvalid = (XSNaNE&~IntToFp);
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// if the input is NaN or infinity
// | if the integer result overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the result doesn't round to zero
// | | | | or the result rounds up out of bounds
// | | | | and the result didn't underflow
// | | | | |
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive result rounds up out of range
// select the inexact flag to output
assign Invalid = ToInt ? IntInvalid : FpInvalid;
// pack the flags together
// - fp -> int does not set the overflow or underflow flags
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
///////////////////////////////////////////////////////////////////////////
// result selection
///////////////////////////////////////////////////////////////////////////
// determine if you shoould kill the result
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
if (`FPSIZES == 1) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
end else begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
NaNRes = 1'bx;
InfRes = 1'bx;
UfRes = 1'bx;
Res = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
end else begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
end else begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
end
2'h0: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
end else begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
end
2'h2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
end else begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
end
endcase
end
// choose the floating point result
// - if the input is NaN (and using the NaN input) output the NaN result
// - if the input is infinity or the output overflows
// - kill the InfE signal if the input isn't a floating point value
// - if killing the result output the underflow result
// - otherwise output the normal result
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
(XInfE&~IntToFp)|Overflow ? InfRes :
KillRes ? UfRes :
Res;
// *** probably can optimize the negation
// select the overflow integer result
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged result should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive result if needed
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal result (trmined and sign extended if nessisary)
assign CvtIntResE = Invalid ? OfIntRes :
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -43,8 +43,7 @@ module fma(
input logic XSgnM, YSgnM, // input signs - memory stage
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
input logic ZOrigDenormE, // is the original precision denormalized
input logic XDenormE, YDenormE, ZDenormE, // is denorm
input logic ZDenormE, // is denorm
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
input logic XNaNM, YNaNM, ZNaNM, // is NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
@ -73,10 +72,10 @@ module fma(
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
logic Mult;
logic ZOrigDenormM;
logic ZDenormM;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
@ -84,10 +83,10 @@ module fma(
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM});
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
.FMAResM, .FMAFlgM);
@ -101,7 +100,6 @@ module fma1(
input logic XSgnE, YSgnE, ZSgnE, // input's signs
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single
@ -116,13 +114,11 @@ module fma1(
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
);
logic [`NE-1:0] Denorm; // value of a denormaized number based on precision
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted
logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
@ -133,8 +129,8 @@ module fma1(
// calculate the product's exponent
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal,
.Denorm, .ProdExpE);
expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE,
.ProdExpE);
// multiplication of the mantissa's
mult mult(.XManE, .YManE, .ProdManE);
@ -143,7 +139,7 @@ module fma1(
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal,
align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE,
.AlignedAddendE, .AddendStickyE, .KillProdE);
// calculate the signs and take the opperation into account
@ -167,51 +163,12 @@ endmodule
module expadd(
input logic [`FPSIZES/3:0] FmtE, // precision
input logic [`NE-1:0] XExpE, YExpE, // input exponents
input logic XDenormE, YDenormE, // are the inputs denormalized
input logic XZeroE, YZeroE, // are the inputs zero
output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
output logic [`NE-1:0] Denorm, // value of denormalized exponent
output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2
);
// denormalized numbers have diffrent values depending on which precison it is.
// FLEN - 1
// Other - BIAS - other bias + 1
if (`FPSIZES == 1) begin
assign Denorm = 1;
end else if (`FPSIZES == 2) begin
assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtE)
`FMT: Denorm = 1;
`FMT1: Denorm = `BIAS-`BIAS1+1;
`FMT2: Denorm = `BIAS-`BIAS2+1;
default: Denorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtE)
2'h3: Denorm = 1;
2'h1: Denorm = `BIAS-`D_BIAS+1;
2'h0: Denorm = `BIAS-`S_BIAS+1;
2'h2: Denorm = `BIAS-`H_BIAS+1;
endcase
end
end
// pick denormalized value or exponent
assign XExpVal = XDenormE ? Denorm : XExpE;
assign YExpVal = YDenormE ? Denorm : YExpE;
// kill the exponent if the product is zero - either X or Y is 0
assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
endmodule
@ -258,13 +215,10 @@ endmodule
module align(
input logic [`NE-1:0] ZExpE, // biased exponents in B(NE.0) format
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] ZManE, // fractions in U(0.NF) format]
input logic ZDenormE, // is the input denormal
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
input logic [`NE+1:0] ProdExpE, // the product's exponent
input logic [`NE-1:0] Denorm, // the biased value of a denormalized number
output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
output logic AddendStickyE, // Sticky bit calculated from the aliged addend
output logic KillProdE // should the product be set to zero
@ -273,7 +227,6 @@ module align(
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
logic [`NE-1:0] ZExpVal; // Exponent value after taking into account denormals
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
@ -282,11 +235,9 @@ module align(
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have a diffrent exponent value depending on the precision
assign ZExpVal = ZDenormE ? Denorm : ZExpE;
// assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
// *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
// KP- yes we used ProdExpE originally but we did this for timing
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpE};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
@ -409,22 +360,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
lzc lzc(.f, .NormCntE);
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
endmodule
module lzc(
input logic [3*`NF+6:0] f,
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift
);
logic [$clog2(3*`NF+7)-1:0] i;
always_comb begin
i = 0;
while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one
NormCntE = i;
end
endmodule
@ -450,7 +389,7 @@ module fma2(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZOrigDenormM, // is the original precision denormalized
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
@ -465,7 +404,7 @@ module fma2(
logic ResultSgn, ResultSgnTmp; // Result sign
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+2:0] NormSum; // normalized sum
logic [`NF+1:0] NormSum; // normalized sum
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic ResultDenorm; // is the result denormalized
@ -486,7 +425,7 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
.ZOrigDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
@ -533,7 +472,7 @@ module fma2(
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
@ -580,9 +519,9 @@ module normalize(
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZOrigDenormM,
input logic ZDenormM,
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
output logic [`NF+2:0] NormSum, // normalized sum
output logic [`NF+1:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
@ -599,12 +538,12 @@ module normalize(
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZOrigDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
@ -707,27 +646,27 @@ module normalize(
assign LZAPlus2 = SumShifted[3*`NF+8];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
// Calculate the sticky bit
if (`FPSIZES == 1) begin
assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
end else if (`FPSIZES == 2) begin
// 3*NF+5 - NF1 - 3
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
end else if (`FPSIZES == 3) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
end else if (`FPSIZES == 4) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
end
@ -745,7 +684,7 @@ module fmaround(
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+2:0] NormSum, // normalized sum
input logic [`NF+1:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
@ -799,83 +738,53 @@ module fmaround(
if (`FPSIZES == 1) begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[2];
assign Round = NormSum[1];
assign LSBNormSum = NormSum[3];
assign LSBNormSum = NormSum[2];
// used to determine underflow flag
assign UfGuard = NormSum[1];
assign UfRound = NormSum[0];
assign UfLSBNormSum = NormSum[2];
// determine sticky
assign Sticky = UfSticky | NormSum[0];
end else if (`FPSIZES == 2) begin
// \/-------------NF---------------,
// | NF1 | 3 | |
// | NF1 | 2 | |
// '-------NF1------^
// determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// used to determine underflow flag
assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// determine sticky
assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[2];
Round = NormSum[1];
LSBNormSum = NormSum[3];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfGuard = NormSum[1];
UfRound = NormSum[0];
UfLSBNormSum = NormSum[2];
// determine sticky
Sticky = UfSticky | NormSum[0];
end
`FMT1: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[`NF-`NF1+2];
Round = NormSum[`NF-`NF1+1];
LSBNormSum = NormSum[`NF-`NF1+3];
LSBNormSum = NormSum[`NF-`NF1+2];
// used to determine underflow flag
UfGuard = NormSum[`NF-`NF1+1];
UfRound = NormSum[`NF-`NF1];
UfLSBNormSum = NormSum[`NF-`NF1+2];
// determine sticky
Sticky = UfSticky | NormSum[`NF-`NF1];
end
`FMT2: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[`NF-`NF2+2];
Round = NormSum[`NF-`NF2+1];
LSBNormSum = NormSum[`NF-`NF2+3];
LSBNormSum = NormSum[`NF-`NF2+2];
// used to determine underflow flag
UfGuard = NormSum[`NF-`NF2+1];
UfRound = NormSum[`NF-`NF2];
UfLSBNormSum = NormSum[`NF-`NF2+2];
// determine sticky
Sticky = UfSticky | NormSum[`NF-`NF2];
end
default: begin
Guard = 1'bx;
Round = 1'bx;
LSBNormSum = 1'bx;
UfGuard = 1'bx;
UfRound = 1'bx;
UfLSBNormSum = 1'bx;
Sticky = 1'bx;
end
endcase
end
@ -885,56 +794,40 @@ module fmaround(
case (FmtM)
2'h3: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[2];
Round = NormSum[1];
LSBNormSum = NormSum[3];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfGuard = NormSum[1];
UfRound = NormSum[0];
UfLSBNormSum = NormSum[2];
// determine sticky
Sticky = UfSticky | NormSum[0];
end
2'h1: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[`NF-`D_NF+2];
Round = NormSum[`NF-`D_NF+1];
LSBNormSum = NormSum[`NF-`D_NF+3];
LSBNormSum = NormSum[`NF-`D_NF+2];
// used to determine underflow flag
UfGuard = NormSum[`NF-`D_NF+1];
UfRound = NormSum[`NF-`D_NF];
UfLSBNormSum = NormSum[`NF-`D_NF+2];
// determine sticky
Sticky = UfSticky | NormSum[`NF-`D_NF];
end
2'h0: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[`NF-`S_NF+2];
Round = NormSum[`NF-`S_NF+1];
LSBNormSum = NormSum[`NF-`S_NF+3];
LSBNormSum = NormSum[`NF-`S_NF+2];
// used to determine underflow flag
UfGuard = NormSum[`NF-`S_NF+1];
UfRound = NormSum[`NF-`S_NF];
UfLSBNormSum = NormSum[`NF-`S_NF+2];
// determine sticky
Sticky = UfSticky | NormSum[`NF-`S_NF];
end
2'h2: begin
// determine guard, round, and least significant bit of the result
Guard = NormSum[`NF-`H_NF+2];
Round = NormSum[`NF-`H_NF+1];
LSBNormSum = NormSum[`NF-`H_NF+3];
LSBNormSum = NormSum[`NF-`H_NF+2];
// used to determine underflow flag
UfGuard = NormSum[`NF-`H_NF+1];
UfRound = NormSum[`NF-`H_NF];
UfLSBNormSum = NormSum[`NF-`H_NF+2];
// determine sticky
Sticky = UfSticky | NormSum[`NF-`H_NF];
end
endcase
end
end
// used to determine underflow flag
assign UfLSBNormSum = Round;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
@ -944,28 +837,28 @@ module fmaround(
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
@ -973,9 +866,9 @@ module fmaround(
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
@ -1011,7 +904,7 @@ module fmaround(
end
assign NormSumTruncated = NormSum[`NF+2:3];
assign NormSumTruncated = NormSum[`NF+1:2];
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[`NE-1:0];
@ -1083,12 +976,12 @@ module fmaflags(
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
@ -1108,7 +1001,7 @@ module resultselect(
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZOrigDenormM, // is the original precision denormalized
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
@ -1134,7 +1027,7 @@ module resultselect(
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1153,7 +1046,7 @@ module resultselect(
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1173,7 +1066,7 @@ module resultselect(
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1189,7 +1082,7 @@ module resultselect(
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1206,7 +1099,7 @@ module resultselect(
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
@ -1244,7 +1137,7 @@ module resultselect(
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1260,7 +1153,7 @@ module resultselect(
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
@ -1277,7 +1170,7 @@ module resultselect(
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
@ -1295,7 +1188,7 @@ module resultselect(
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};

View File

@ -72,7 +72,7 @@ module fpu (
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
@ -104,7 +104,6 @@ module fpu (
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal
logic ZOrigDenormE;
logic FmtQ;
logic FOpCtrlQ;
@ -114,9 +113,8 @@ module fpu (
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags
logic [63:0] CvtResE; // FP <-> int convert result
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE; // classify result
logic [63:0] CmpResE; // compare result
@ -152,7 +150,7 @@ module fpu (
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
flopenrc #(16) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
@ -177,7 +175,7 @@ module fpu (
// unpack unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE,
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
@ -189,11 +187,11 @@ module fpu (
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE, .ZOrigDenormE,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
@ -214,13 +212,12 @@ module fpu (
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// other FP execution units
fcvtfp fcvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
fcvtint fcvtint (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
@ -231,13 +228,15 @@ module fpu (
mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
mux4 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
CvtIntResE, FIntResSelE, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram
// E/M pipe registers

View File

@ -12,7 +12,6 @@ module unpack (
output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic ZOrigDenormE, // is the original precision denormalized
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
);
@ -30,9 +29,9 @@ module unpack (
assign ZSgnE = Z[`FLEN-1];
// exponent
assign XExpE = X[`FLEN-2:`NF];
assign YExpE = Y[`FLEN-2:`NF];
assign ZExpE = Z[`FLEN-2:`NF];
assign XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
assign YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
assign ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
// fraction (no assumed 1)
assign XFracE = X[`NF-1:0];
@ -49,7 +48,11 @@ module unpack (
assign YExpMaxE = &YExpE;
assign ZExpMaxE = &ZExpE;
assign ZOrigDenormE = 1'b0;
// is the input (in it's original format) denormalized
assign XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero;
assign YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero;
assign ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero;
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
@ -73,7 +76,6 @@ module unpack (
// double and half
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
@ -95,14 +97,16 @@ module unpack (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a denormal number convert the exponent value 1
assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
assign XExpE = FmtE ? {X[`FLEN-2:`NF+1], X[`NF]|XDenormE} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE};
assign YExpE = FmtE ? {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE};
assign ZExpE = FmtE ? {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE};
// is the input (in it's original format) denormalized
assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
assign YOrigDenormE = FmtE ? 0 : ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
assign ZOrigDenormE = FmtE ? 0 : ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
// is the input (in it's original format) denormalized
assign XDenormE = (FmtE ? ~|X[`FLEN-2:`NF] : ~|XLen1[`LEN1-2:`NF1]) & ~XFracZero;
assign YDenormE = (FmtE ? ~|Y[`FLEN-2:`NF] : ~|YLen1[`LEN1-2:`NF1]) & ~YFracZero;
assign ZDenormE = (FmtE ? ~|Z[`FLEN-2:`NF] : ~|ZLen1[`LEN1-2:`NF1]) & ~ZFracZero;
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
@ -141,7 +145,6 @@ module unpack (
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
@ -156,14 +159,15 @@ module unpack (
// There are 2 case statements
// - one for other singals and one for sgn/exp/frac
// - need two for the dependencies in the expoenent calculation
//*** pull out the ~FracZero and and it at the end
always_comb begin
case (FmtE)
`FMT: begin // if input is largest precision (`FLEN - ie quad or double)
// This is the original format so set OrigDenorm to 0
XOrigDenormE = 1'b0;
YOrigDenormE = 1'b0;
ZOrigDenormE = 1'b0;
XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero;
YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero;
ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |X[`FLEN-2:`NF];
@ -178,9 +182,9 @@ module unpack (
`FMT1: begin // if input is larger precsion (`LEN1 - double or single)
// is the input (in it's original format) denormalized
XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
XDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero;
YDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero;
ZDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |XLen1[`LEN1-2:`NF1];
@ -195,9 +199,9 @@ module unpack (
`FMT2: begin // if input is smallest precsion (`LEN2 - single or half)
// is the input (in it's original format) denormalized
XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero;
YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero;
ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero;
XDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero;
YDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero;
ZDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |XLen2[`LEN2-2:`NF2];
@ -210,9 +214,9 @@ module unpack (
ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
end
default: begin
XOrigDenormE = 0;
YOrigDenormE = 0;
ZOrigDenormE = 0;
XDenormE = 0;
YDenormE = 0;
ZDenormE = 0;
XExpNonzero = 0;
YExpNonzero = 0;
ZExpNonzero = 0;
@ -231,9 +235,9 @@ module unpack (
ZSgnE = Z[`FLEN-1];
// extract the exponent
XExpE = X[`FLEN-2:`NF];
YExpE = Y[`FLEN-2:`NF];
ZExpE = Z[`FLEN-2:`NF];
XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
// extract the fraction
XFracE = X[`NF-1:0];
@ -256,9 +260,9 @@ module unpack (
// also need to take into account possible zero/denorm/inf/NaN values
// convert the larger precision's exponent to use the largest precision's bias
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE};
YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE};
ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
@ -281,9 +285,9 @@ module unpack (
// also need to take into account possible zero/denorm/inf/NaN values
// convert the smallest precision's exponent to use the largest precision's bias
XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]};
YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]};
ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]};
XExpE = XDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]};
YExpE = YDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]};
ZExpE = ZDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
@ -318,7 +322,6 @@ module unpack (
logic [`D_LEN-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
logic [`S_LEN-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
logic [`H_LEN-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
logic XOrigDenormE, YOrigDenormE; // the original value of XYZ is denormalized
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
@ -343,10 +346,10 @@ module unpack (
case (FmtE)
2'b11: begin // if input is quad percision
// This is the original format so set OrigDenorm to 0
XOrigDenormE = 1'b0;
YOrigDenormE = 1'b0;
ZOrigDenormE = 1'b0;
// is the input (in it's original format) denormalized
XDenormE = ~|X[`Q_LEN-2:`Q_NF] & ~XFracZero;
YDenormE = ~|Y[`Q_LEN-2:`Q_NF] & ~YFracZero;
ZDenormE = ~|Z[`Q_LEN-2:`Q_NF] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |X[`Q_LEN-2:`Q_NF];
@ -366,9 +369,9 @@ module unpack (
ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF];
// is the input (in it's original format) denormalized
XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero;
YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero;
ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero;
XDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero;
YDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero;
ZDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |XLen1[`D_LEN-2:`D_NF];
@ -383,9 +386,9 @@ module unpack (
ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
// is the input (in it's original format) denormalized
XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero;
YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero;
ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero;
XDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero;
YDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero;
ZDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |XLen2[`S_LEN-2:`S_NF];
@ -400,9 +403,9 @@ module unpack (
ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
// is the input (in it's original format) denormalized
XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero;
YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero;
ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero;
XDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero;
YDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero;
ZDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero;
// is the exponent non-zero
XExpNonzero = |XLen3[`H_LEN-2:`H_NF];
@ -421,9 +424,9 @@ module unpack (
ZSgnE = Z[`Q_LEN-1];
// extract the exponent
XExpE = X[`Q_LEN-2:`Q_NF];
YExpE = Y[`Q_LEN-2:`Q_NF];
ZExpE = Z[`Q_LEN-2:`Q_NF];
XExpE = {X[`Q_LEN-2:`Q_NF+1], X[`Q_NF]|XDenormE};
YExpE = {Y[`Q_LEN-2:`Q_NF+1], Y[`Q_NF]|YDenormE};
ZExpE = {Z[`Q_LEN-2:`Q_NF+1], Z[`Q_NF]|ZDenormE};
// extract the fraction
XFracE = X[`Q_NF-1:0];
@ -446,9 +449,9 @@ module unpack (
// convert the double precsion exponent into quad precsion
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]};
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]};
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]};
XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF+1], XLen1[`D_NF]|XDenormE};
YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF+1], YLen1[`D_NF]|YDenormE};
ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF+1], ZLen1[`D_NF]|ZDenormE};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
@ -470,9 +473,9 @@ module unpack (
// also need to take into account possible zero/denorm/inf/NaN values
// convert the single precsion exponent into quad precsion
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]};
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]};
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]};
XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF+1], XLen2[`S_NF]|XDenormE};
YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF+1], YLen2[`S_NF]|YDenormE};
ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF+1], ZLen2[`S_NF]|ZDenormE};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -494,9 +497,9 @@ module unpack (
// also need to take into account possible zero/denorm/inf/NaN values
// convert the half precsion exponent into quad precsion
XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]};
YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]};
ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]};
XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF+1], XLen3[`H_NF]|XDenormE};
YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF+1], YLen3[`H_NF]|YDenormE};
ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF+1], ZLen3[`H_NF]|ZDenormE};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
@ -537,10 +540,10 @@ module unpack (
assign YSNaNE = YNaNE&~YFracE[`NF-1];
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
// is the input denormalized
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
// // is the input denormalized
// assign XDenormE = XExpZero & ~XFracZero;
// assign YDenormE = YExpZero & ~YFracZero;
// assign ZDenormE = ZExpZero & ~ZFracZero;
// is the input infinity
assign XInfE = XExpMaxE & XFracZero;

View File

@ -0,0 +1,15 @@
//leading zero counter i.e. priority encoder
module lzc #(parameter WIDTH=1) (
input logic [WIDTH-1:0] num,
output logic [$clog2(WIDTH)-1:0] ZeroCnt
);
/* verilator lint_off CMPCONST */
logic [$clog2(WIDTH)-1:0] i;
always_comb begin
i = 0;
while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1; // search for leading one
ZeroCnt = i;
end
/* verilator lint_on CMPCONST */
endmodule

View File

@ -126,6 +126,16 @@ module ppa_mult_128 #(parameter WIDTH=128) (
assign y = a * b;
endmodule
module ppa_alu_8 #(parameter WIDTH=8) (
input logic [WIDTH-1:0] A, B,
input logic [2:0] ALUControl,
input logic [2:0] Funct3,
output logic [WIDTH-1:0] Result,
output logic [WIDTH-1:0] Sum);
ppa_alu #(WIDTH) alu (.*);
endmodule
module ppa_alu_16 #(parameter WIDTH=16) (
input logic [WIDTH-1:0] A, B,
input logic [2:0] ALUControl,
@ -133,7 +143,7 @@ module ppa_alu_16 #(parameter WIDTH=16) (
output logic [WIDTH-1:0] Result,
output logic [WIDTH-1:0] Sum);
ppa_alu #(WIDTH) alu_16 (.*);
ppa_alu #(WIDTH) alu (.*);
endmodule
module ppa_alu_32 #(parameter WIDTH=32) (
@ -143,7 +153,7 @@ module ppa_alu_32 #(parameter WIDTH=32) (
output logic [WIDTH-1:0] Result,
output logic [WIDTH-1:0] Sum);
ppa_alu #(WIDTH) alu_32 (.*);
ppa_alu #(WIDTH) alu (.*);
endmodule
module ppa_alu_64 #(parameter WIDTH=64) (
@ -153,7 +163,17 @@ module ppa_alu_64 #(parameter WIDTH=64) (
output logic [WIDTH-1:0] Result,
output logic [WIDTH-1:0] Sum);
ppa_alu #(WIDTH) alu_64 (.*);
ppa_alu #(WIDTH) alu (.*);
endmodule
module ppa_alu_128 #(parameter WIDTH=128) (
input logic [WIDTH-1:0] A, B,
input logic [2:0] ALUControl,
input logic [2:0] Funct3,
output logic [WIDTH-1:0] Result,
output logic [WIDTH-1:0] Sum);
ppa_alu #(WIDTH) alu (.*);
endmodule
module ppa_alu #(parameter WIDTH=32) (
@ -209,9 +229,11 @@ module ppa_alu #(parameter WIDTH=32) (
3'b111: FullResult = A & B; // and
endcase
// support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
if (WIDTH==64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
else assign Result = FullResult;
assign Result = FullResult;
// not using W64 so it has the same architecture regardless of width
// // support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
// if (WIDTH==64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
// else assign Result = FullResult;
endmodule
module ppa_shiftleft_8 #(parameter WIDTH=8) (
@ -254,51 +276,6 @@ module ppa_shiftleft_128 #(parameter WIDTH=128) (
assign y = a << amt;
endmodule
module ppa_shifter_8 #(parameter WIDTH=8) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
input logic Right, Arith, W64,
output logic [WIDTH-1:0] Y);
ppa_shifter #(WIDTH) sh (.*);
endmodule
module ppa_shifter_16 #(parameter WIDTH=16) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
input logic Right, Arith, W64,
output logic [WIDTH-1:0] Y);
ppa_shifter #(WIDTH) sh (.*);
endmodule
module ppa_shifter_32 #(parameter WIDTH=32) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
input logic Right, Arith, W64,
output logic [WIDTH-1:0] Y);
ppa_shifter #(WIDTH) sh (.*);
endmodule
module ppa_shifter_64 #(parameter WIDTH=64) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
input logic Right, Arith, W64,
output logic [WIDTH-1:0] Y);
ppa_shifter #(WIDTH) sh (.*);
endmodule
module ppa_shifter_128 #(parameter WIDTH=128) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
input logic Right, Arith, W64,
output logic [WIDTH-1:0] Y);
ppa_shifter #(WIDTH) sh (.*);
endmodule
module ppa_shifter #(parameter WIDTH=32) (
input logic [WIDTH-1:0] A,
input logic [$clog2(WIDTH)-1:0] Amt,
@ -313,29 +290,35 @@ module ppa_shifter #(parameter WIDTH=32) (
// For RV64, 32 and 64-bit shifts are needed, with sign extension.
// funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong)
if (WIDTH == 64 | WIDTH ==128) begin:shifter // RV64 or 128
always_comb // funnel mux
if (W64) begin // 32-bit shifts
if (Right)
if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
else z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
else z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
end else begin
if (Right)
if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
else z = {{WIDTH-1{1'b0}}, A};
else z = {A, {WIDTH-1{1'b0}}};
end
assign amttrunc = W64 ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift
end else begin:shifter // RV32 or less
always_comb // funnel mux
// if (WIDTH == 64 | WIDTH ==128) begin:shifter // RV64 or 128
// always_comb // funnel mux
// if (W64) begin // 32-bit shifts
// if (Right)
// if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
// else z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
// else z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
// end else begin
// if (Right)
// if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
// else z = {{WIDTH-1{1'b0}}, A};
// else z = {A, {WIDTH-1{1'b0}}};
// end
// assign amttrunc = W64 ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift
// end else begin:shifter // RV32 or less
// always_comb // funnel mux
// if (Right)
// if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
// else z = {{WIDTH-1{1'b0}}, A};
// else z = {A, {WIDTH-1{1'b0}}};
// assign amttrunc = Amt; // shift amount
// end
always_comb // funnel mux
if (Right)
if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
else z = {{WIDTH-1{1'b0}}, A};
else z = {A, {WIDTH-1{1'b0}}};
assign amttrunc = Amt; // shift amount
end
// opposite offset for right shfits
assign offset = Right ? amttrunc : ~amttrunc;
@ -345,7 +328,51 @@ module ppa_shifter #(parameter WIDTH=32) (
assign Y = zshift[WIDTH-1:0];
endmodule
// just report one hot
// module ppa_shifter_8 #(parameter WIDTH=8) (
// input logic [WIDTH-1:0] A,
// input logic [$clog2(WIDTH)-1:0] Amt,
// input logic Right, Arith, W64,
// output logic [WIDTH-1:0] Y);
// ppa_shifter #(WIDTH) sh (.*);
// endmodule
// module ppa_shifter_16 #(parameter WIDTH=16) (
// input logic [WIDTH-1:0] A,
// input logic [$clog2(WIDTH)-1:0] Amt,
// input logic Right, Arith, W64,
// output logic [WIDTH-1:0] Y);
// ppa_shifter #(WIDTH) sh (.*);
// endmodule
// module ppa_shifter_32 #(parameter WIDTH=32) (
// input logic [WIDTH-1:0] A,
// input logic [$clog2(WIDTH)-1:0] Amt,
// input logic Right, Arith, W64,
// output logic [WIDTH-1:0] Y);
// ppa_shifter #(WIDTH) sh (.*);
// endmodule
// module ppa_shifter_64 #(parameter WIDTH=64) (
// input logic [WIDTH-1:0] A,
// input logic [$clog2(WIDTH)-1:0] Amt,
// input logic Right, Arith, W64,
// output logic [WIDTH-1:0] Y);
// ppa_shifter #(WIDTH) sh (.*);
// endmodule
// module ppa_shifter_128 #(parameter WIDTH=128) (
// input logic [WIDTH-1:0] A,
// input logic [$clog2(WIDTH)-1:0] Amt,
// input logic Right, Arith, W64,
// output logic [WIDTH-1:0] Y);
// ppa_shifter #(WIDTH) sh (.*);
// endmodule
module ppa_prioritythermometer #(parameter N = 8) (
input logic [N-1:0] a,
output logic [N-1:0] y);
@ -538,7 +565,7 @@ endmodule
// *** some way to express data-critical inputs
module ppa_flop_8 #(parameter WIDTH = 8) (
module ppa_flop #(parameter WIDTH = 8) (
input logic clk,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
@ -547,13 +574,26 @@ module ppa_flop_8 #(parameter WIDTH = 8) (
q <= #1 d;
endmodule
module ppa_flop_8 #(parameter WIDTH = 8) (
input logic clk,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
logic [WIDTH-1:0] q1;
ppa_flop #(WIDTH) f1(clk, d, q1);
ppa_flop #(WIDTH) f2(clk, q1, q);
endmodule
module ppa_flop_16 #(parameter WIDTH = 16) (
input logic clk,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flop #(WIDTH) f1(clk, d, q1);
ppa_flop #(WIDTH) f2(clk, q1, q);
endmodule
module ppa_flop_32 #(parameter WIDTH = 32) (
@ -561,8 +601,10 @@ module ppa_flop_32 #(parameter WIDTH = 32) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flop #(WIDTH) f1(clk, d, q1);
ppa_flop #(WIDTH) f2(clk, q1, q);
endmodule
module ppa_flop_64 #(parameter WIDTH = 64) (
@ -570,8 +612,10 @@ module ppa_flop_64 #(parameter WIDTH = 64) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flop #(WIDTH) f1(clk, d, q1);
ppa_flop #(WIDTH) f2(clk, q1, q);
endmodule
module ppa_flop_128 #(parameter WIDTH = 128) (
@ -579,8 +623,20 @@ module ppa_flop_128 #(parameter WIDTH = 128) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
logic [WIDTH-1:0] q1;
ppa_flop #(WIDTH) f1(clk, d, q1);
ppa_flop #(WIDTH) f2(clk, q1, q);
endmodule
module ppa_flopr #(parameter WIDTH = 8) (
input logic clk, reset,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
q <= #1 d;
if (reset) q <= #1 0;
else q <= #1 d;
endmodule
module ppa_flopr_8 #(parameter WIDTH = 8) (
@ -588,9 +644,10 @@ module ppa_flopr_8 #(parameter WIDTH = 8) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_flopr_16 #(parameter WIDTH = 16) (
@ -598,9 +655,10 @@ module ppa_flopr_16 #(parameter WIDTH = 16) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_flopr_32 #(parameter WIDTH = 32) (
@ -608,9 +666,10 @@ module ppa_flopr_32 #(parameter WIDTH = 32) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_flopr_64 #(parameter WIDTH = 64) (
@ -618,9 +677,10 @@ module ppa_flopr_64 #(parameter WIDTH = 64) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_flopr_128 #(parameter WIDTH = 128) (
@ -628,7 +688,18 @@ module ppa_flopr_128 #(parameter WIDTH = 128) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
logic [WIDTH-1:0] q1;
ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_floprasync #(parameter WIDTH = 8) (
input logic clk, reset,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
endmodule
@ -638,9 +709,10 @@ module ppa_floprasync_8 #(parameter WIDTH = 8) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_floprasync_16 #(parameter WIDTH = 16) (
@ -648,9 +720,10 @@ module ppa_floprasync_16 #(parameter WIDTH = 16) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_floprasync_32 #(parameter WIDTH = 32) (
@ -658,9 +731,10 @@ module ppa_floprasync_32 #(parameter WIDTH = 32) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_floprasync_64 #(parameter WIDTH = 64) (
@ -668,9 +742,10 @@ module ppa_floprasync_64 #(parameter WIDTH = 64) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_floprasync_128 #(parameter WIDTH = 128) (
@ -678,9 +753,20 @@ module ppa_floprasync_128 #(parameter WIDTH = 128) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk or posedge reset)
if (reset) q <= #1 0;
else q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
endmodule
module ppa_flopenr #(parameter WIDTH = 8) (
input logic clk, reset, en,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
endmodule
module ppa_flopenr_8 #(parameter WIDTH = 8) (
@ -688,9 +774,10 @@ module ppa_flopenr_8 #(parameter WIDTH = 8) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
endmodule
module ppa_flopenr_16 #(parameter WIDTH = 16) (
@ -698,9 +785,10 @@ module ppa_flopenr_16 #(parameter WIDTH = 16) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
endmodule
module ppa_flopenr_32 #(parameter WIDTH = 32) (
@ -708,9 +796,10 @@ module ppa_flopenr_32 #(parameter WIDTH = 32) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
endmodule
module ppa_flopenr_64 #(parameter WIDTH = 64) (
@ -718,9 +807,10 @@ module ppa_flopenr_64 #(parameter WIDTH = 64) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
endmodule
module ppa_flopenr_128 #(parameter WIDTH = 128) (
@ -728,9 +818,10 @@ module ppa_flopenr_128 #(parameter WIDTH = 128) (
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
always_ff @(posedge clk)
if (reset) q <= #1 0;
else if (en) q <= #1 d;
logic [WIDTH-1:0] q1;
ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
endmodule
module ppa_csa_8 #(parameter WIDTH = 8) (

View File

@ -8,3 +8,9 @@ testgen: testgen.c
qst2: qst2.c
gcc qst2.c -lm -o qst2
gcc -lm -o testgen testgen.c
./testgen
exptestgen: exptestgen.c
gcc -lm -o exptestgen exptestgen.c
./exptestgen

View File

@ -21,7 +21,7 @@
/* Prototypes */
void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa);
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
void printhex(FILE *fptr, double x);
double random_input(void);
double random_input_e(void);
@ -31,12 +31,13 @@ double random_input_e(void);
void main(void)
{
FILE *fptr;
// e1 & e2 are exponents
// a & b are mantissas
// r_mantissa is result of mantissa divsion
// r_exp is result of exponent division
double a, b, r_mantissa, r_exp;
int e1, e2;
// aExp & bExp are exponents
// aFrac & bFrac are mantissas
// rFrac is result of fractional divsion
// rExp is result of exponent division
double aFrac, bFrac, rFrac;
int aExp, bExp, rExp;
int aSign, bSign, rSign;
double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001,
@ -51,25 +52,28 @@ void main(void)
}
for (i=0; i<ENTRIES; i++) {
b = mantissa[i];
e2 = exponent[i] + bias;
bFrac = mantissa[i];
bExp = exponent[i] + bias;
bSign = i%2;
for (j=0; j<ENTRIES; j++) {
a = mantissa[j];
e1 = exponent[j] + bias;
r_mantissa = a/b;
r_exp = e1 - e2 + bias;
output(fptr, e1, a, e2, b, r_exp, r_mantissa);
aFrac = mantissa[j];
aExp = exponent[j] + bias;
aSign = j%2;
rFrac = aFrac/bFrac;
rExp = aExp - bExp + bias;
rSign = (i+j)%2;
output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
}
}
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
// b = random_input();
// e1 = random_input_e() + BIAS; // make new random input function for exponents
// e2 = random_input_e() + BIAS;
// r_mantissa = a/b;
// r_exp = e1 - e2 + BIAS;
// output(fptr, e1, a, e2, b, r_exp, r_mantissa);
// aFrac = random_input();
// bFrac = random_input();
// aExp = random_input_e() + BIAS; // make new random input function for exponents
// bExp = random_input_e() + BIAS;
// rFrac = a/b;
// rEx[] = e1 - e2 + BIAS;
// output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
// }
fclose(fptr);
@ -77,19 +81,21 @@ void main(void)
/* Functions */
void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa)
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
{
fprintf(fptr, "%03x", e1);
//printhex(fptr, e1, exp);
printhex(fptr, a);
// Print a in standard double format
fprintf(fptr, "%03x", aExp|(aSign<<11));
printhex(fptr, aFrac);
fprintf(fptr, "_");
fprintf(fptr, "%03x", e2);
//printhex(fptr, e2, exp);
printhex(fptr, b);
// Print b in standard double format
fprintf(fptr, "%03x", bExp|(bSign<<11));
printhex(fptr, bFrac);
fprintf(fptr, "_");
fprintf(fptr, "%03x", r_exp);
//printhex(fptr, r_exp, exp);
printhex(fptr, r_mantissa);
// Print r in standard double format
fprintf(fptr, "%03x", rExp|(rSign<<11));
printhex(fptr, rFrac);
fprintf(fptr, "\n");
}

View File

@ -37,6 +37,8 @@ module srt #(parameter Nf=52) (
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [Nf-1:0] SrcXFrac, SrcYFrac,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
@ -44,14 +46,18 @@ module srt #(parameter Nf=52) (
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputss
input logic Sqrt, // perform square root, not divide
output logic [Nf-1:0] Quot, Rem, // *** later handle integers
output logic rsign,
output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
logic qp, qz, qm; // quotient is +1, 0, or -1
logic [Nf-1:0] X, Dpreproc;
logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [Nf+2:0] rp, rm;
logic [`NE-1:0] calcExp;
logic calcSign;
logic [Nf-1:0] X, Dpreproc;
logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [Nf+2:0] rp, rm;
srtpreproc #(Nf) preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc);
@ -70,6 +76,8 @@ module srt #(parameter Nf=52) (
// Accumulate quotient digits in a shift register
qsel #(Nf) qsel(WS[55:52], WC[55:52], qp, qz, qm);
qacc #(Nf+3) qacc(clk, Start, qp, qz, qm, rp, rm);
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
// Divisor Selection logic
inv dinv(D, Db);
@ -77,6 +85,12 @@ module srt #(parameter Nf=52) (
// Partial Product Generation
csa csa(WS, WC, Dsel, qp, WSA, WCA);
otfc2 otfc2(clk, Start, qp, qz, qm, QuotOTFC);
expcalc expcalc(.XExp, .YExp, .calcExp);
signcalc signcalc(.XSign, .YSign, .calcSign);
srtpostproc postproc(rp, rm, Quot);
endmodule
@ -198,9 +212,57 @@ module qacc #(parameter N=55) (
end */
endmodule
//////////
// otfc //
//////////
module otfc2 #(parameter N=52) (
input logic clk,
input logic Start,
input logic qp, qz, qm,
output logic [N-1:0] r
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
flopr #(N+3) QMreg(clk, Start, QMNext, QM);
always_comb begin
QR = Q[N+1:0];
QMR = QM[N+1:0]; // Shift Q and QM
if (qp) begin
QNext = {QR, 1'b1};
QMNext = {QR, 1'b0};
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qm is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
assign r = Q[54] ? Q[53:2] : Q[52:1];
endmodule
/////////
// inv //
/////////
module inv(input logic [55:0] in,
output logic [55:0] out);
@ -247,6 +309,33 @@ module csa #(parameter N=56) (
(in2[54:0] & in3[54:0]), cin};
endmodule
//////////////
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp
);
assign calcExp = XExp - YExp + 11'b01111111111;
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
output logic calcSign
);
assign calcSign = XSign ^ YSign;
endmodule
//////////////
// finaladd //
//////////////

View File

@ -38,35 +38,41 @@ endmodule
//////////
module testbench;
logic clk;
logic req;
logic req;
logic done;
logic [51:0] a;
logic [51:0] b;
logic [51:0] r;
logic [54:0] rp, rm; // positive quotient digits
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r, rOTFC;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 52+52+52;
parameter MEM_WIDTH = 64+64+64;
`define memr 51:0
`define memb 103:52
`define mema 155:104
`define memr 63:0
`define memb 127:64
`define mema 191:128
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
// bit field of an array
logic [51:0] correctr, nextr, diffn, diffp;
logic [63:0] correctr, nextr, diffn, diffp;
logic [10:0] rExp;
logic rsign;
integer testnum, errors;
// Divider
srt #(52) srt(.clk, .Start(req),
.Stall(1'b0), .Flush(1'b0),
.SrcXFrac(a), .SrcYFrac(b),
.XExp(aExp), .YExp(bExp), .rExp,
.XSign(asign), .YSign(bsign), .rsign,
.SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot(r), .Rem(), .Flags());
.Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags());
// Counter
counter counter(clk, req, done);
@ -88,7 +94,9 @@ module testbench;
$readmemh ("testvectors", Tests);
Vec = Tests[testnum];
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
req <= #5 1;
end
@ -100,16 +108,23 @@ module testbench;
if (done)
begin
req <= #5 1;
diffp = correctr - r;
diffn = r - correctr;
if (($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end
if (a === 52'hxxxxxxxxxxxxx)
if (r !== rOTFC) // Check if OTFC works
begin
errors = errors+1;
$display("OTFC is %h, should be %h\n", rOTFC, r);
$display("failed\n");
// $stop;
end
if (afrac === 52'hxxxxxxxxxxxxx)
begin
$display("%d Tests completed successfully", testnum);
$stop;
@ -122,9 +137,11 @@ module testbench;
testnum = testnum+1;
Vec = Tests[testnum];
$display("a = %h b = %h",a,b);
a = Vec[`mema];
b = Vec[`memb];
nextr = Vec[`memr];
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
end
end

File diff suppressed because it is too large Load Diff

View File

@ -1,123 +0,0 @@
// testbench
module testbench ();
logic [63:0] op1;
logic [63:0] op2;
logic [2:0] FOpCtrlE;
logic [2:0] FrmE;
logic op_type;
logic FmtE;
logic OvEn;
logic UnEn;
logic XSgnE, YSgnE, ZSgnE;
logic XSgnM, YSgnM;
logic [10:0] XExpE, YExpE, ZExpE;
logic [10:0] XExpM, YExpM, ZExpM;
logic [52:0] XManE, YManE, ZManE;
logic [52:0] XManM, YManM, ZManM;
logic [10:0] BiasE;
logic XNaNE, YNaNE, ZNaNE;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE;
logic XInfM, YInfM, ZInfM;
logic XExpMaxE;
logic XNormE;
logic FDivBusyE;
logic start;
logic reset;
logic XDenorm;
logic YDenorm;
logic [63:0] AS_Result;
logic [4:0] Flags;
logic Denorm;
logic done;
logic clk;
logic [63:0] yexpected;
logic [63:0] vectornum, errors; // bookkeeping variables
logic [199:0] testvectors[50000:0]; // array of testvectors
logic [7:0] flags_expected;
integer handle3;
integer desc3;
// instantiate device under test
unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
.XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
.FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
// current fpdivsqrt does not operation on denorms yet
assign Denorm = XDenormE | YDenormE | Flags[3];
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
end
initial
begin
handle3 = $fopen("f64_div_rne.out");
$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
vectornum = 0; errors = 0;
start = 1'b0;
// reset
reset = 1; #27; reset = 0;
end
initial
begin
desc3 = handle3;
// Operation (if applicable)
#0 op_type = 1'b0;
// Precision (32-bit or 64-bit)
#0 FmtE = 1'b1;
// From fctrl logic to dictate operation
#0 FOpCtrlE = 3'b000;
// Rounding Mode
#0 FrmE = 3'b000;
// Trap masking (n/a for RISC-V)
#0 OvEn = 1'b0;
#0 UnEn = 1'b0;
end
always @(posedge clk)
begin
if (~reset)
begin
#0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
#50 start = 1'b1;
repeat (2)
@(posedge clk);
// deassert start after 2 cycles
start = 1'b0;
repeat (10)
@(posedge clk);
$fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
vectornum = vectornum + 1;
if (testvectors[vectornum] === 200'bx) begin
$display("%d tests completed", vectornum);
$finish;
end
end // if (~reset)
$display("%d vectors processed", vectornum);
end // always @ (posedge clk)
endmodule // tb

View File

@ -17,6 +17,7 @@ module testbenchfp;
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
logic [2:0] OpCtrl[]; // list of op controls
logic [2:0] Unit[]; // list of units being tested
logic WriteInt[]; // Is being written to integer resgiter
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
logic [1:0] Fmt[]; // list of formats for the other units
logic [1:0] FmaFmt[]; // list of formats for the FMA
@ -37,6 +38,7 @@ module testbenchfp;
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
logic WriteIntVal; // value of the current WriteInt
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
@ -53,8 +55,9 @@ module testbenchfp;
logic [4:0] ResFlg; // Result flags
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
logic [`FPSIZES/3:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes, CvtFpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFpFlg, DivFlg, CvtIntFlg, CmpFlg; // Outputed flags
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
@ -108,7 +111,6 @@ module testbenchfp;
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
logic ZOrigDenorm, FmaRneZOrigDenorm, FmaRzZOrigDenorm, FmaRuZOrigDenorm, FmaRdZOrigDenorm, FmaRnmZOrigDenorm; // is the original precision dnormalized
// in-between FMA signals
logic Mult;
@ -150,6 +152,7 @@ module testbenchfp;
Tests = {Tests, f128rv32cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
@ -159,6 +162,7 @@ module testbenchfp;
Tests = {Tests, f128rv64cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
@ -172,39 +176,55 @@ module testbenchfp;
Tests = {Tests, f128f64cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b01, 3'b11};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b11};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b01};
end
end
if(`F_SUPPORTED) begin // if single precision is supported
// add the 128 <-> 32 bit conversions to the to-be-tested list
Tests = {Tests, f128f32cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b00, 3'b11};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b11};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b00};
end
end
if(`ZFH_SUPPORTED) begin // if half precision is supported
// add the 128 <-> 16 bit conversions to the to-be-tested list
Tests = {Tests, f128f16cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b10, 3'b11};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b11};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
// add the compare tests/op-ctrls/unit/fmt
Tests = {Tests, f128cmp};
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
for(int i = 0; i<15; i++) begin
Unit = {Unit, `CMPUNIT};
Fmt = {Fmt, 2'b11};
@ -214,6 +234,7 @@ module testbenchfp;
// add the addition tests/op-ctrls/unit/fmt
Tests = {Tests, f128add};
OpCtrl = {OpCtrl, `ADD_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
@ -223,6 +244,7 @@ module testbenchfp;
// add the subtraction tests/op-ctrls/unit/fmt
Tests = {Tests, f128sub};
OpCtrl = {OpCtrl, `SUB_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
@ -232,6 +254,7 @@ module testbenchfp;
// add the multiply tests/op-ctrls/unit/fmt
Tests = {Tests, f128mul};
OpCtrl = {OpCtrl, `MUL_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
@ -241,6 +264,7 @@ module testbenchfp;
// add the divide tests/op-ctrls/unit/fmt
Tests = {Tests, f128div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11};
@ -250,6 +274,7 @@ module testbenchfp;
// add the square-root tests/op-ctrls/unit/fmt
Tests = {Tests, f128sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11};
@ -264,9 +289,7 @@ module testbenchfp;
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
// add the format for the Fma
for(int i = 0; i<5; i++) begin
FmaFmt = {FmaFmt, 2'b11};
end
FmaFmt = {FmaFmt, 2'b11};
end
end
if (`D_SUPPORTED) begin // if double precision is supported
@ -274,6 +297,7 @@ module testbenchfp;
Tests = {Tests, f64rv32cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
@ -281,9 +305,10 @@ module testbenchfp;
end
if (`XLEN == 64) begin // if 64-bit integers are being supported
Tests = {Tests, f64rv64cvtint};
// add the op-codes for these tests to the op-code list
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
// add what unit is used and the fmt to their lists (one for each test)
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
Fmt = {Fmt, 2'b01};
@ -296,28 +321,39 @@ module testbenchfp;
Tests = {Tests, f64f32cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b00, 3'b01};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b01};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b00};
end
end
if(`ZFH_SUPPORTED) begin // if half precision is supported
// add the 64 <-> 16 bit conversions to the to-be-tested list
Tests = {Tests, f64f16cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b10, 3'b01};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b01};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64cmp};
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
for(int i = 0; i<15; i++) begin
Unit = {Unit, `CMPUNIT};
Fmt = {Fmt, 2'b01};
@ -327,6 +363,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64add};
OpCtrl = {OpCtrl, `ADD_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
@ -336,6 +373,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64sub};
OpCtrl = {OpCtrl, `SUB_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
@ -345,6 +383,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64mul};
OpCtrl = {OpCtrl, `MUL_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
@ -354,6 +393,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01};
@ -363,6 +403,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01};
@ -376,9 +417,7 @@ module testbenchfp;
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
for(int i = 0; i<5; i++) begin
FmaFmt = {FmaFmt, 2'b01};
end
FmaFmt = {FmaFmt, 2'b01};
end
end
if (`F_SUPPORTED) begin // if single precision being supported
@ -386,6 +425,7 @@ module testbenchfp;
Tests = {Tests, f32rv32cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
@ -393,9 +433,10 @@ module testbenchfp;
end
if (`XLEN == 64) begin // if 64-bit integers are supported
Tests = {Tests, f32rv64cvtint};
// add the op-codes for these tests to the op-code list
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
// add what unit is used and the fmt to their lists (one for each test)
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
Fmt = {Fmt, 2'b00};
@ -408,17 +449,23 @@ module testbenchfp;
Tests = {Tests, f32f16cvt};
// add the op-ctrls (i.e. the format of the result)
OpCtrl = {OpCtrl, 3'b10, 3'b00};
WriteInt = {WriteInt, 1'b0, 1'b0};
// add the unit being tested and fmt (input format)
for(int i = 0; i<10; i++) begin
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b00};
end
for(int i = 0; i<5; i++) begin
Unit = {Unit, `CVTFPUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32cmp};
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
for(int i = 0; i<15; i++) begin
Unit = {Unit, `CMPUNIT};
Fmt = {Fmt, 2'b00};
@ -428,6 +475,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32add};
OpCtrl = {OpCtrl, `ADD_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
@ -437,6 +485,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32sub};
OpCtrl = {OpCtrl, `SUB_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
@ -446,6 +495,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32mul};
OpCtrl = {OpCtrl, `MUL_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
@ -455,6 +505,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00};
@ -464,6 +515,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00};
@ -473,13 +525,11 @@ module testbenchfp;
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
// FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
// FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
// FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
// FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
// for(int i = 0; i<5; i++) begin
FmaFmt = {FmaFmt, 2'b00};
// end
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b00};
end
end
if (`ZFH_SUPPORTED) begin // if half precision supported
@ -487,26 +537,29 @@ module testbenchfp;
Tests = {Tests, f16rv32cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
Fmt = {Fmt, 2'b10};
end
if (`XLEN == 64) begin // if 64-bit integers are supported
Tests = {Tests, f16rv64cvtint, f16rv32cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
Fmt = {Fmt, 2'b10};
end
Tests = {Tests, f16rv64cvtint};
// add the op-codes for these tests to the op-code list
OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
// add what unit is used and the fmt to their lists (one for each test)
for(int i = 0; i<20; i++) begin
Unit = {Unit, `CVTINTUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16cmp};
OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
for(int i = 0; i<15; i++) begin
Unit = {Unit, `CMPUNIT};
Fmt = {Fmt, 2'b10};
@ -516,6 +569,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16add};
OpCtrl = {OpCtrl, `ADD_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
@ -525,6 +579,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16sub};
OpCtrl = {OpCtrl, `SUB_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
@ -534,6 +589,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16mul};
OpCtrl = {OpCtrl, `MUL_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
@ -543,6 +599,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10};
@ -552,6 +609,7 @@ module testbenchfp;
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16sqrt};
OpCtrl = {OpCtrl, `SQRT_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10};
@ -561,13 +619,11 @@ module testbenchfp;
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
// FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
// FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
// FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
// FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
// for(int i = 0; i<5; i++) begin
FmaFmt = {FmaFmt, 2'b10};
// end
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b10};
end
end
@ -606,6 +662,7 @@ module testbenchfp;
always_comb UnitVal = Unit[TestNum];
always_comb FmtVal = Fmt[TestNum];
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
always_comb WriteIntVal = WriteInt[OpCtrlNum];
always_comb FrmVal = Frm[FrmNum];
assign Mult = OpCtrlVal === 3'b100;
@ -624,7 +681,7 @@ module testbenchfp;
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .ZOrigDenormE(FmaRneZOrigDenorm),
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
.XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
@ -634,7 +691,7 @@ module testbenchfp;
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .ZOrigDenormE(FmaRzZOrigDenorm),
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
.XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
@ -644,7 +701,7 @@ module testbenchfp;
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .ZOrigDenormE(FmaRuZOrigDenorm),
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
.XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
@ -654,7 +711,7 @@ module testbenchfp;
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .ZOrigDenormE(FmaRdZOrigDenorm),
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
.XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
@ -663,7 +720,7 @@ module testbenchfp;
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[VectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .ZOrigDenormE(FmaRnmZOrigDenorm),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
.XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm),
@ -673,7 +730,7 @@ module testbenchfp;
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .ZOrigDenormE(ZOrigDenorm),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN),
.XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm),
@ -699,13 +756,12 @@ module testbenchfp;
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
.ZExpM(FmaRneZExp), .ZOrigDenormM(FmaRneZOrigDenorm),
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
@ -718,13 +774,12 @@ module testbenchfp;
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
.ZExpM(FmaRzZExp), .ZOrigDenormM(FmaRzZOrigDenorm),
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
@ -737,13 +792,12 @@ module testbenchfp;
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
.ZExpM(FmaRuZExp), .ZOrigDenormM(FmaRuZOrigDenorm),
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
@ -755,14 +809,13 @@ module testbenchfp;
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
.ZExpM(FmaRdZExp), .ZOrigDenormM(FmaRdZOrigDenorm),
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
@ -775,13 +828,12 @@ module testbenchfp;
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
.ZExpM(FmaRnmZExp), .ZOrigDenormM(FmaRnmZOrigDenorm),
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
@ -794,12 +846,11 @@ module testbenchfp;
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm),
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
.ZExpM(ZExp), .ZOrigDenormM(ZOrigDenorm),
.ZExpM(ZExp), .ZDenormM(ZDenorm),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
@ -809,7 +860,12 @@ module testbenchfp;
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(Frmal), .FmtE(ModFmt), .CvtFpRes, .CvtFpFlgE);
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
@ -901,38 +957,55 @@ module testbenchfp;
AnsNaN = 1'b0;
ResNaN = 1'b0;
end
else begin
case (FmtVal)
else if (UnitVal === `CVTFPUNIT) begin
case (OpCtrlVal[1:0])
4'b11: begin // quad
AnsNaN = &Ans[`FLEN-2:`NF]&(|Ans[`NF-1:0]);
ResNaN = &FmaRes[`FLEN-2:`NF]&(|FmaRes[`NF-1:0]);
AnsNaN = &Ans[`Q_LEN-2:`NF]&(|Ans[`Q_NF-1:0]);
ResNaN = &Res[`Q_LEN-2:`NF]&(|Res[`Q_NF-1:0]);
end
4'b01: begin // double
AnsNaN = &Ans[`LEN1-2:`NF1]&(|Ans[`NF1-1:0]);
ResNaN = &FmaRes[`LEN1-2:`NF1]&(|FmaRes[`NF1-1:0]);
AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
end
4'b00: begin // single
AnsNaN = &Ans[`LEN2-2:`NF2]&(|Ans[`NF2-1:0]);
ResNaN = &FmaRes[`LEN2-2:`NF2]&(|FmaRes[`NF2-1:0]);
AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
end
4'b10: begin // half
AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
ResNaN = &FmaRes[`H_LEN-2:`H_NF]&(|FmaRes[`H_NF-1:0]);
ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
end
endcase
end
else begin
case (FmtVal)
4'b11: begin // quad
AnsNaN = &Ans[`Q_LEN-2:`Q_NF]&(|Ans[`Q_NF-1:0]);
ResNaN = &Res[`Q_LEN-2:`Q_NF]&(|Res[`Q_NF-1:0]);
end
4'b01: begin // double
AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
end
4'b00: begin // single
AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
end
4'b10: begin // half
AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
end
endcase
end
end
// check results on falling edge of clk
always @(negedge clk) begin
always_comb begin
// select the result to check
case (UnitVal)
`FMAUNIT: Res = FmaRes;
`DIVUNIT: Res = DivRes;
`CMPUNIT: Res = CmpRes;
`CVTINTUNIT: Res = CvtRes;
`CVTFPUNIT: Res = CvtFpRes;
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
`CVTFPUNIT: Res = CvtRes;
endcase
// select the flag to check
@ -940,9 +1013,13 @@ module testbenchfp;
`FMAUNIT: ResFlg = FmaFlg;
`DIVUNIT: ResFlg = DivFlg;
`CMPUNIT: ResFlg = CmpFlg;
`CVTINTUNIT: ResFlg = CvtIntFlg;
`CVTFPUNIT: ResFlg = CvtFpFlg;
`CVTINTUNIT: ResFlg = CvtFlg;
`CVTFPUNIT: ResFlg = CvtFlg;
endcase
end
// check results on falling edge of clk
always @(negedge clk) begin
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
// - the sign of the NaN does not matter for the opperations being tested
@ -1060,15 +1137,19 @@ module testbenchfp;
else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format
case (OpCtrlVal[1:0])
2'b11: NaNGood = ((AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(AnsNaN&(Res[`Q_LEN-2:0] === Ans[`Q_LEN-2:0])) |
(XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) |
(YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})));
2'b01: NaNGood = ((AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(AnsNaN&(Res[`D_LEN-2:0] === Ans[`D_LEN-2:0])) |
(XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) |
(YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})));
2'b00: NaNGood = ((AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(AnsNaN&(Res[`S_LEN-2:0] === Ans[`S_LEN-2:0])) |
(XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) |
(YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})));
2'b10: NaNGood = ((AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(AnsNaN&(Res[`H_LEN-2:0] === Ans[`H_LEN-2:0])) |
(XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) |
(YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})));
endcase
@ -1086,15 +1167,23 @@ module testbenchfp;
///////////////////////////////////////////////////////////////////////////////////////////////
// check if the non-fma test is correct
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CMPUNIT)) begin
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
$stop;
end
// in The RISC-V Instruction Set Manual (2019) section 11.8 specifies that
// if a any of the inputs to the EQ LT LE opperations then the opperation should return a 0
else if ((UnitVal === `CMPUNIT)&(XNaN|YNaN)&(Res !== (`FLEN)'(0))) begin
// TestFloat sets the result to all 1's when there is an invalid result, however in
// http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says
// for an unsigned integer result 0 is also okay
// Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but
// the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) |
(WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) |
(Res === Ans | NaNGood | NaNGood === 1'bx)) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -1147,6 +1236,8 @@ module testbenchfp;
// increment the test
TestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[TestNum]}, FmaRneVectors);
@ -1197,7 +1288,6 @@ module readfmavectors (
input logic [1:0] FmaFmt, // the format of the FMA inputs
input logic [`FLEN*4+7:0] TestVector, // the test vector
output logic [`FLEN-1:0] Ans, // the correct answer
output logic ZOrigDenormE, // is z denormalized in it's original precision
output logic [4:0] AnsFlg, // the correct flag
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
@ -1244,10 +1334,10 @@ module readfmavectors (
endcase
end
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
.XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZOrigDenormE);
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZDenormE);
endmodule
@ -1287,7 +1377,6 @@ module readvectors (
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XNormE, XExpMaxE,
output logic ZOrigDenormE,
output logic [`FLEN-1:0] X, Y, Z
);
@ -1371,7 +1460,7 @@ module readvectors (
Ans = TestVector[8];
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]};
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]};
Ans = TestVector[8];
end
@ -1464,89 +1553,105 @@ module readvectors (
case (Fmt)
2'b11: begin // quad
// {is the integer a long, is the opperation to an integer}
casex ({OpCtrl[2], OpCtrl[0]})
casex ({OpCtrl[2:1]})
2'b11: begin // long -> quad
X = {`FLEN{1'bx}};
SrcA = TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // int -> quad
2'b10: begin // int -> quad
// correctly sign extend the integer depending on if it's a signed/unsigned test
SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)]};
X = {`FLEN{1'bx}};
SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+32-1]}}, TestVector[8+`Q_LEN+32-1:8+(`Q_LEN)]};
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b10: begin // quad -> long
2'b01: begin // quad -> long
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // double -> long
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
2'b00: begin // quad -> int
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end
endcase
end
2'b01: begin // double
// {is the integer a long, is the opperation to an integer}
casex ({OpCtrl[2], OpCtrl[0]})
// {Int->Fp?, is the integer a long}
casex ({OpCtrl[2:1]})
2'b11: begin // long -> double
X = {`FLEN{1'bx}};
SrcA = TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)];
Ans = TestVector[8+(`D_LEN-1):8];
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b01: begin // int -> double
2'b10: begin // int -> double
// correctly sign extend the integer depending on if it's a signed/unsigned test
SrcA = {{`XLEN-32{TestVector[8+`D_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)]};
Ans = TestVector[8+(`D_LEN-1):8];
X = {`FLEN{1'bx}};
SrcA = {{`XLEN-32{TestVector[8+`D_LEN+32-1]}}, TestVector[8+`D_LEN+32-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b10: begin // double -> long
2'b01: begin // double -> long
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // double -> int
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+32+`D_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end
endcase
end
2'b00: begin // single
// {is the integer a long, is the opperation to an integer}
casex ({OpCtrl[2], OpCtrl[0]})
casex ({OpCtrl[2:1]})
2'b11: begin // long -> single
X = {`FLEN{1'bx}};
SrcA = TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)];
Ans = TestVector[8+(`S_LEN-1):8];
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b01: begin // int -> single
2'b10: begin // int -> single
// correctly sign extend the integer depending on if it's a signed/unsigned test
SrcA = {{`XLEN-32{TestVector[8+`S_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)]};
Ans = TestVector[8+(`S_LEN-1):8];
X = {`FLEN{1'bx}};
SrcA = {{`XLEN-32{TestVector[8+`S_LEN+32-1]}}, TestVector[8+`S_LEN+32-1:8+(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // single -> long
2'b01: begin // single -> long
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // single -> int
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+32+`S_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end
endcase
end
2'b10: begin // half
// {is the integer a long, is the opperation to an integer}
casex ({OpCtrl[2], OpCtrl[0]})
casex ({OpCtrl[2:1]})
2'b11: begin // long -> half
X = {`FLEN{1'bx}};
SrcA = TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)];
Ans = TestVector[8+(`H_LEN-1):8];
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
2'b01: begin // int -> half
2'b10: begin // int -> half
// correctly sign extend the integer depending on if it's a signed/unsigned test
SrcA = {{`XLEN-32{TestVector[8+`H_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)]};
Ans = TestVector[8+(`H_LEN-1):8];
X = {`FLEN{1'bx}};
SrcA = {{`XLEN-32{TestVector[8+`H_LEN+32-1]}}, TestVector[8+`H_LEN+32-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
2'b10: begin // half -> long
2'b01: begin // half -> long
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // half -> int
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}}, TestVector[8+(`XLEN-1):8]};
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+32+`H_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
end
endcase
end
@ -1557,5 +1662,5 @@ module readvectors (
unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZOrigDenormE);
.XExpMaxE);
endmodule

View File

@ -87,7 +87,7 @@ logic [3:0] dummy;
"arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i;
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
//"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
@ -110,7 +110,7 @@ logic [3:0] dummy;
"arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
//"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"wally32a": if (`A_SUPPORTED) tests = wally32a;
@ -183,7 +183,7 @@ logic [3:0] dummy;
// read test vectors into memory
pathname = tvpaths[tests[0].atoi()];
/* if (tests[0] == `IMPERASTEST)
/* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0];
else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"};
@ -255,7 +255,7 @@ logic [3:0] dummy;
//if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
@ -368,11 +368,12 @@ module riscvassertions;
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
end
endmodule
@ -408,47 +409,45 @@ module DCacheFlushFSM
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k;
integer i, j, k;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule

View File

@ -34,14 +34,14 @@
`define LE_OPCTRL 3'b011
`define LT_OPCTRL 3'b001
`define EQ_OPCTRL 3'b010
`define TO_UI_OPCTRL 3'b011
`define TO_UI_OPCTRL 3'b000
`define TO_I_OPCTRL 3'b001
`define TO_UL_OPCTRL 3'b111
`define TO_L_OPCTRL 3'b101
`define FROM_UI_OPCTRL 3'b010
`define FROM_I_OPCTRL 3'b000
`define TO_UL_OPCTRL 3'b010
`define TO_L_OPCTRL 3'b011
`define FROM_UI_OPCTRL 3'b100
`define FROM_I_OPCTRL 3'b101
`define FROM_UL_OPCTRL 3'b110
`define FROM_L_OPCTRL 3'b100
`define FROM_L_OPCTRL 3'b111
`define RNE 3'b000
`define RZ 3'b001
`define RU 3'b011
@ -54,16 +54,6 @@
`define CMPUNIT 4
string f16rv32cvtint[] = '{
"f16_to_i32_rne.tv",
"f16_to_i32_rz.tv",
"f16_to_i32_ru.tv",
"f16_to_i32_rd.tv",
"f16_to_i32_rnm.tv",
"f16_to_ui32_rne.tv",
"f16_to_ui32_rz.tv",
"f16_to_ui32_ru.tv",
"f16_to_ui32_rd.tv",
"f16_to_ui32_rnm.tv",
"ui32_to_f16_rne.tv",
"ui32_to_f16_rz.tv",
"ui32_to_f16_ru.tv",
@ -73,20 +63,20 @@ string f16rv32cvtint[] = '{
"i32_to_f16_rz.tv",
"i32_to_f16_ru.tv",
"i32_to_f16_rd.tv",
"i32_to_f16_rnm.tv"
"i32_to_f16_rnm.tv",
"f16_to_ui32_rne.tv",
"f16_to_ui32_rz.tv",
"f16_to_ui32_ru.tv",
"f16_to_ui32_rd.tv",
"f16_to_ui32_rnm.tv",
"f16_to_i32_rne.tv",
"f16_to_i32_rz.tv",
"f16_to_i32_ru.tv",
"f16_to_i32_rd.tv",
"f16_to_i32_rnm.tv"
};
string f16rv64cvtint[] = '{
"f16_to_ui64_rne.tv",
"f16_to_ui64_rz.tv",
"f16_to_ui64_ru.tv",
"f16_to_ui64_rd.tv",
"f16_to_ui64_rnm.tv",
"f16_to_i64_rne.tv",
"f16_to_i64_rz.tv",
"f16_to_i64_ru.tv",
"f16_to_i64_rd.tv",
"f16_to_i64_rnm.tv",
"ui64_to_f16_rne.tv",
"ui64_to_f16_rz.tv",
"ui64_to_f16_ru.tv",
@ -96,7 +86,17 @@ string f16rv64cvtint[] = '{
"i64_to_f16_rz.tv",
"i64_to_f16_ru.tv",
"i64_to_f16_rd.tv",
"i64_to_f16_rnm.tv"
"i64_to_f16_rnm.tv",
"f16_to_ui64_rne.tv",
"f16_to_ui64_rz.tv",
"f16_to_ui64_ru.tv",
"f16_to_ui64_rd.tv",
"f16_to_ui64_rnm.tv",
"f16_to_i64_rne.tv",
"f16_to_i64_rz.tv",
"f16_to_i64_ru.tv",
"f16_to_i64_rd.tv",
"f16_to_i64_rnm.tv"
};
string f32rv32cvtint[] = '{
@ -307,16 +307,16 @@ string f128f32cvt[] = '{
string f128f64cvt[] = '{
"f64_to_f128_rne.tv",
"f64_to_f128_rz.tv",
"f64_to_f128_ru.tv",
"f64_to_f128_rd.tv",
"f64_to_f128_rnm.tv",
"f128_to_f64_rne.tv",
"f128_to_f64_rz.tv",
"f128_to_f64_ru.tv",
"f128_to_f64_rd.tv",
"f128_to_f64_rnm.tv"
"f128_to_f64_rnm.tv",
"f64_to_f128_rne.tv",
"f64_to_f128_rz.tv",
"f64_to_f128_ru.tv",
"f64_to_f128_rd.tv",
"f64_to_f128_rnm.tv"
};
string f16add[] = '{

View File

@ -17,6 +17,9 @@ if {$tech == "sky130"} {
} elseif {$tech == "sky90"} {
set s9lib $timing_lib/sky90/sky90_sc/V1.7.4/lib
lappend search_path $s9lib
} elseif {$tech == "tsmc28"} {
set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a
lappend search_path $s10lib
}
# Synthetic libraries
@ -30,6 +33,8 @@ if {$tech == "sky130"} {
lappend target_library $s8lib/sky130_osu_sc_12T_ms_TT_1P8_25C.ccs.db
} elseif {$tech == "sky90"} {
lappend target_library $s9lib/scc9gena_tt_1.2v_25C.db
} elseif {$tech == "tsmc28"} {
lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db
}
# Set Link Library

View File

@ -1,111 +1,135 @@
#!/usr/bin/python3
# Madeleine Masser-Frye mmasserfrye@hmc.edu 5/22
from distutils.log import error
from statistics import median
from operator import index
import subprocess
import statistics
import csv
import re
from matplotlib.cbook import flatten
import matplotlib.pyplot as plt
import matplotlib.lines as lines
import matplotlib.axes as axes
import numpy as np
from collections import namedtuple
def getData(mod=None, width=None):
specStr = ''
if mod != None:
specStr = mod
if width != None:
specStr += ('_'+str(width))
specStr += '*'
def synthsfromcsv(filename):
Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy")
with open(filename, newline='') as csvfile:
csvreader = csv.reader(csvfile)
global allSynths
allSynths = list(csvreader)
for i in range(len(allSynths)):
for j in range(len(allSynths[0])):
try: allSynths[i][j] = int(allSynths[i][j])
except:
try: allSynths[i][j] = float(allSynths[i][j])
except: pass
allSynths[i] = Synth(*allSynths[i])
def synthsintocsv():
''' writes a CSV with one line for every available synthesis
each line contains the module, tech, width, target freq, and resulting metrics
'''
print("This takes a moment...")
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
output = subprocess.check_output(['bash','-c', bashCommand])
allSynths = output.decode("utf-8").split('\n')[:-1]
bashCommand = "grep 'Critical Path Length' runs/ppa_{}/reports/*qor*".format(specStr)
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
linesCPL = outputCPL.decode("utf-8").split('\n')[:-1]
specReg = re.compile('[a-zA-Z0-9]+')
metricReg = re.compile('\d+\.\d+[e]?[-+]?\d*')
bashCommand = "grep 'Design Area' runs/ppa_{}/reports/*qor*".format(specStr)
outputDA = subprocess.check_output(['bash','-c', bashCommand])
linesDA = outputDA.decode("utf-8").split('\n')[:-1]
bashCommand = "grep '100' runs/ppa_{}/reports/*power*".format(specStr)
outputP = subprocess.check_output(['bash','-c', bashCommand])
linesP = outputP.decode("utf-8").split('\n')[:-1]
cpl = re.compile('\d{1}\.\d{6}')
f = re.compile('_\d*_MHz')
wm = re.compile('ppa_\w*_\d*_qor')
da = re.compile('\d*\.\d{6}')
p = re.compile('\d+\.\d+[e-]*\d+')
allSynths = []
for i in range(len(linesCPL)):
line = linesCPL[i]
mwm = wm.findall(line)[0][4:-4].split('_')
freq = int(f.findall(line)[0][1:-4])
delay = float(cpl.findall(line)[0])
area = float(da.findall(linesDA[i])[0])
mod = mwm[0]
width = int(mwm[1])
power = p.findall(linesP[i])
lpower = float(power[2])
denergy = float(power[1])*delay
oneSynth = [mod, width, freq, delay, area, lpower, denergy]
allSynths += [oneSynth]
return allSynths
def getVals(module, var, freq=None):
allSynths = getData(mod=module)
if (var == 'delay'):
ind = 3
units = " (ns)"
elif (var == 'area'):
ind = 4
units = " (sq microns)"
elif (var == 'lpower'):
ind = 5
units = " (nW)"
elif (var == 'denergy'):
ind = 6
units = " (pJ)"
else:
error
widths = []
metric = []
if (freq != None):
for oneSynth in allSynths:
if (oneSynth[2] == freq):
widths += [oneSynth[1]]
metric += [oneSynth[ind]]
else:
widths = [8, 16, 32, 64, 128]
for w in widths:
m = 10000 # large number to start
for oneSynth in allSynths:
if (oneSynth[1] == w):
if (oneSynth[3] < m):
m = oneSynth[3]
met = oneSynth[ind]
metric += [met]
return widths, metric, units
def writeCSV():
allSynths = getData()
file = open("ppaData.csv", "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])
for one in allSynths:
writer.writerow(one)
for oneSynth in allSynths:
module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7]
tech = tech[:-2]
metrics = []
for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
bashCommand = bashCommand.format(*phrase)
try: output = subprocess.check_output(['bash','-c', bashCommand])
except: print("At least one synth run doesn't have reports, try cleanup() first")
nums = metricReg.findall(str(output))
nums = [float(m) for m in nums]
metrics += nums
delay = metrics[0]
area = metrics[1]
lpower = metrics[4]
denergy = (metrics[2] + metrics[3])*delay # (switching + internal powers)*delay
writer.writerow([module, tech, width, freq, delay, area, lpower, denergy])
file.close()
def genLegend(fits, coefs, module, r2):
def cleanup():
''' removes runs that didn't work
'''
bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*'
try:
output = subprocess.check_output(['bash','-c', bashCommand])
allSynths = output.decode("utf-8").split('\n')[:-1]
for run in allSynths:
run = run.split('MHz')[0]
bc = 'rm -r '+ run + '*'
output = subprocess.check_output(['bash','-c', bc])
except: pass
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
output = subprocess.check_output(['bash','-c', bashCommand])
allSynths = output.decode("utf-8").split('\n')[:-1]
for oneSynth in allSynths:
for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
bashCommand = bashCommand.format(*phrase)
try: output = subprocess.check_output(['bash','-c', bashCommand])
except:
bc = 'rm -r '+ oneSynth[2:]
try: output = subprocess.check_output(['bash','-c', bc])
except: pass
print("All cleaned up!")
def getVals(tech, module, var, freq=None):
''' for a specified tech, module, and variable/metric
returns a list of values for that metric in ascending width order
works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width
'''
global widths
metric = []
widthL = []
if (freq != None):
for oneSynth in allSynths:
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
widthL += [oneSynth.width]
osdict = oneSynth._asdict()
metric += [osdict[var]]
metric = [x for _, x in sorted(zip(widthL, metric))] # ordering
else:
for w in widths:
m = 100000 # large number to start
for oneSynth in allSynths:
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module):
if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq):
m = oneSynth.delay
osdict = oneSynth._asdict()
met = osdict[var]
try: metric += [met]
except: pass
if ('flop' in module) & (var == 'area'):
metric = [m/2 for m in metric] # since two flops in each module
if (var == 'denergy'):
metric = [m*1000 for m in metric] # more practical units for regression coefs
return metric
def genLegend(fits, coefs, r2, spec):
''' generates a list of two legend elements
labels line with fit equation and dots with tech and r squared of the fit
'''
coefsr = [str(round(c, 3)) for c in coefs]
@ -127,26 +151,17 @@ def genLegend(fits, coefs, module, r2):
eq += " + " + coefsr[ind] + "*Nlog2(N)"
ind += 1
legend_elements = [lines.Line2D([0], [0], color='orange', label=eq),
lines.Line2D([0], [0], color='steelblue', ls='', marker='o', label=' R^2='+ str(round(r2, 4)))]
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq),
lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +' $R^2$='+ str(round(r2, 4)))]
return legend_elements
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True):
''' module: string module name
freq: int freq (MHz)
var: string delay, area, lpower, or denergy
fits: constant, linear, square, log2, Nlog2
plots given variable vs width for all matching syntheses with regression
'''
module: string module name
freq: int freq (MHz)
var: string delay, area, lpower, or denergy
fits: constant, linear, square, log2, Nlog2
plots chosen variable vs width for all matching syntheses with regression
'''
widths, metric, units = getVals(module, var, freq=freq)
coefs, r2, funcArr = regress(widths, metric, fits)
xp = np.linspace(8, 140, 200)
pred = []
for x in xp:
y = [func(x) for func in funcArr]
pred += [sum(np.multiply(coefs, y))]
if ax is None:
singlePlot = True
@ -154,21 +169,48 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
else:
singlePlot = False
ax.scatter(widths, metric)
ax.plot(xp, pred, color='orange')
fullLeg = []
global techSpecs
global widths
legend_elements = genLegend(fits, coefs, module, r2)
ax.legend(handles=legend_elements)
global norms
for spec in techSpecs:
metric = getVals(spec.tech, module, var, freq=freq)
if norm:
techdict = spec._asdict()
norm = techdict[var]
metric = [m/norm for m in metric] # comment out to not normalize
if len(metric) == 5:
xp, pred, leg = regress(widths, metric, spec, fits)
fullLeg += leg
ax.scatter(widths, metric, color=spec.color, marker=spec.shape)
ax.plot(xp, pred, color=spec.color)
ax.legend(handles=fullLeg)
ax.set_xticks(widths)
ax.set_xlabel("Width (bits)")
ax.set_ylabel(str.title(var) + units)
if norm:
ylabeldic = {"lpower": "Normalized Leakage Power", "denergy": "Normalized Dynamic Energy", "area": "INVx1 Areas", "delay": "FO4 Delays"}
else:
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ-CHECK)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
ax.set_ylabel(ylabeldic[var])
if singlePlot:
ax.set_title(module + " (target " + str(freq) + "MHz)")
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
ax.set_title(module + titleStr)
plt.show()
def regress(widths, var, fits='clsgn'):
def regress(widths, var, spec, fits='clsgn'):
''' fits a curve to the given points
returns lists of x and y values to plot that curve and legend elements with the equation
'''
funcArr = genFuncs(fits)
@ -187,9 +229,22 @@ def regress(widths, var, fits='clsgn'):
except:
resid = 0
r2 = 1 - resid / (y.size * y.var())
return coefs, r2, funcArr
def makeCoefTable():
xp = np.linspace(8, 140, 200)
pred = []
for x in xp:
n = [func(x) for func in funcArr]
pred += [sum(np.multiply(coefs, n))]
leg = genLegend(fits, coefs, r2, spec)
return xp, pred, leg
def makeCoefTable(tech):
''' not currently in use, may salvage later
writes CSV with each line containing the coefficients for a regression fit
to a particular combination of module, metric, and target frequency
'''
file = open("ppaFitting.csv", "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
@ -198,7 +253,8 @@ def makeCoefTable():
for comb in [['delay', 5000], ['area', 5000], ['area', 10]]:
var = comb[0]
freq = comb[1]
widths, metric, units = getVals(mod, freq, var)
metric = getVals(tech, mod, freq, var)
global widths
coefs, r2, funcArr = regress(widths, metric)
row = [mod] + comb + np.ndarray.tolist(coefs) + [r2]
writer.writerow(row)
@ -206,6 +262,9 @@ def makeCoefTable():
file.close()
def genFuncs(fits='clsgn'):
''' helper function for regress()
returns array of functions with one for each term desired in the regression fit
'''
funcArr = []
if 'c' in fits:
funcArr += [lambda x: 1]
@ -220,78 +279,188 @@ def genFuncs(fits='clsgn'):
return funcArr
def noOutliers(freqs, delays, areas):
''' returns a pared down list of freqs, delays, and areas
cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area
helper function to freqPlot()
'''
f=[]
d=[]
a=[]
try:
med = statistics.median(freqs)
for i in range(len(freqs)):
norm = freqs[i]/med
if (norm > 0.25) & (norm<1.75):
f += [freqs[i]]
d += [delays[i]]
a += [areas[i]]
except: pass
ind = delays.index(min(delays))
med = freqs[ind]
for i in range(len(freqs)):
norm = freqs[i]/med
if (norm > 0.25) & (norm<1.75):
f += [freqs[i]]
d += [delays[i]]
a += [areas[i]]
return f, d, a
def freqPlot(mod, width):
allSynths = getData(mod=mod, width=width)
freqsV, delaysV, areasV, freqsA, delaysA, areasA = ([] for i in range(6))
def freqPlot(tech, mod, width):
''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
'''
global allSynths
freqsL, delaysL, areasL = ([[], []] for i in range(3))
for oneSynth in allSynths:
if (mod == oneSynth[0]) & (width == oneSynth[1]):
if (1000/oneSynth[3] < oneSynth[2]):
freqsV += [oneSynth[2]]
delaysV += [oneSynth[3]]
areasV += [oneSynth[4]]
else:
freqsA += [oneSynth[2]]
delaysA += [oneSynth[3]]
areasA += [oneSynth[4]]
if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
freqsL[ind] += [oneSynth.freq]
delaysL[ind] += [oneSynth.delay]
areasL[ind] += [oneSynth.area]
freqsV, delaysV, areasV = noOutliers(freqsV, delaysV, areasV)
freqsA, delaysA, areasA = noOutliers(freqsA, delaysA, areasA)
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)
adprodA = np.multiply(areasA, delaysA)
adsqA = np.multiply(adprodA, delaysA)
adprodV = np.multiply(areasV, delaysV)
adsqV = np.multiply(adprodV, delaysV)
for ind in [0,1]:
areas = areasL[ind]
delays = delaysL[ind]
freqs = freqsL[ind]
if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
freqs, delays, areas = noOutliers(freqs, delays, areas) # comment out to see all syntheses
c = 'blue' if ind else 'green'
adprod = adprodpow(areas, delays, 1)
adpow = adprodpow(areas, delays, 2)
ax1.scatter(freqs, delays, color=c)
ax2.scatter(freqs, areas, color=c)
ax3.scatter(freqs, adprod, color=c)
ax4.scatter(freqs, adpow, color=c)
legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'),
lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]
f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)
ax1.scatter(freqsA, delaysA, color='green')
ax1.scatter(freqsV, delaysV, color='blue')
ax2.scatter(freqsA, areasA, color='green')
ax2.scatter(freqsV, areasV, color='blue')
ax3.scatter(freqsA, adprodA, color='green')
ax3.scatter(freqsV, adprodV, color='blue')
ax4.scatter(freqsA, adsqA, color='green')
ax4.scatter(freqsV, adsqV, color='blue')
ax1.legend(handles=legend_elements)
ax4.set_xlabel("Target Freq (MHz)")
ax1.set_ylabel('Delay (ns)')
ax2.set_ylabel('Area (sq microns)')
ax3.set_ylabel('Area * Delay')
ax4.set_ylabel('Area * Delay^2')
ax4.set_ylabel('Area * $Delay^2$')
ax1.set_title(mod + '_' + str(width))
plt.show()
def plotPPA(mod, freq=None):
fig, axs = plt.subplots(2, 2)
oneMetricPlot(mod, 'delay', ax=axs[0,0], fits='clg', freq=freq)
oneMetricPlot(mod, 'area', ax=axs[0,1], fits='s', freq=freq)
oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='c', freq=freq)
oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='s', freq=freq)
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " min delay"
plt.suptitle(mod + titleStr)
def squareAreaDelay(tech, mod, width):
''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
'''
global allSynths
freqsL, delaysL, areasL = ([[], []] for i in range(3))
for oneSynth in allSynths:
if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
freqsL[ind] += [oneSynth.freq]
delaysL[ind] += [oneSynth.delay]
areasL[ind] += [oneSynth.area]
f, (ax1) = plt.subplots(1, 1)
ax2 = ax1.twinx()
for ind in [0,1]:
areas = areasL[ind]
delays = delaysL[ind]
targets = freqsL[ind]
targets = [1000/f for f in targets]
if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all
if not ind:
achievedDelays = delays
c = 'blue' if ind else 'green'
ax1.scatter(targets, delays, marker='^', color=c)
ax2.scatter(targets, areas, marker='s', color=c)
bestAchieved = min(achievedDelays)
legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'),
lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'),
lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'),
lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')]
ax2.legend(handles=legend_elements, loc='upper left')
ax1.set_xlabel("Delay Targeted (ns)")
ax1.set_ylabel("Delay Achieved (ns)")
ax2.set_ylabel('Area (sq microns)')
ax1.set_title(mod + '_' + str(width))
squarify(f)
xvals = np.array(ax1.get_xlim())
frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0])
areaLowerLim = min(flatten(areasL))-100
areaUpperLim = max(flatten(areasL))/frac + areaLowerLim
ax2.set_ylim([areaLowerLim, areaUpperLim])
ax1.plot(xvals, xvals, ls="--", c=".3")
ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--')
plt.show()
# writeCSV()
# makeCoefTable()
def squarify(fig):
''' helper function for squareAreaDelay()
forces matplotlib figure to be a square
'''
w, h = fig.get_size_inches()
if w > h:
t = fig.subplotpars.top
b = fig.subplotpars.bottom
axs = h*(t-b)
l = (1.-axs/w)/2
fig.subplots_adjust(left=l, right=1-l)
else:
t = fig.subplotpars.right
b = fig.subplotpars.left
axs = w*(t-b)
l = (1.-axs/h)/2
fig.subplots_adjust(bottom=l, top=1-l)
freqPlot('flopr', 128)
def adprodpow(areas, delays, pow):
''' for each value in [areas] returns area*delay^pow
helper function for freqPlot'''
result = []
# plotPPA('add')
for i in range(len(areas)):
result += [(areas[i])*(delays[i])**pow]
return result
def plotPPA(mod, freq=None, norm=True):
''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits
if no freq specified, uses the synthesis with best achievable delay for each width
overlays data from both techs
'''
fig, axs = plt.subplots(2, 2)
global fitDict
modFit = fitDict[mod]
oneMetricPlot(mod, 'delay', ax=axs[0,0], fits=modFit[0], freq=freq, norm=norm)
oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=freq, norm=norm)
oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=freq, norm=norm)
oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=freq, norm=norm)
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
plt.suptitle(mod + titleStr)
plt.show()
if __name__ == '__main__':
# set up stuff, global variables
widths = [8, 16, 32, 64, 128]
# fitDict in progress
fitDict = {'add': ['cg', 'cl'], 'mult': ['clg', 's'], 'comparator': ['clsgn', 'clsgn'], 'csa': ['clsgn', 'clsgn'], 'shiftleft': ['clsgn', 'clsgn'], 'flop': ['cl', 'cl'], 'priorityencoder': ['clsgn', 'clsgn']}
TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1.96, 1.98, 1], ['gf32', 'purple', 's', 15e-3, .351, .3116, 1], ['tsmc28', 'blue', '^', 12.2e-3, .252, 1.09, 1]]
techSpecs = [TechSpec(*t) for t in techSpecs]
# cleanup()
# synthsintocsv() # slow, run only when new synth runs to add to csv
synthsfromcsv('ppaData.csv') # your csv here!
### examples
# for mod in ['comparator', 'priorityencoder', 'shiftleft']:
# for w in [16, 32]:
# freqPlot('sky90', mod, w) # the weird ones
# squareAreaDelay('sky90', 'add', 32)
# oneMetricPlot('add', 'delay')
for mod in ['add', 'csa', 'mult', 'comparator', 'priorityencoder', 'shiftleft', 'flop']:
plotPPA(mod, norm=False) # no norm input now defaults to normalized

File diff suppressed because it is too large Load Diff

View File

@ -42,21 +42,22 @@ def getData():
return allSynths
allSynths = getData()
arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 14, 20, 40]
arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20, 40]
widths = [32, 64, 128]
modules = ['flopr']
tech = 'sky90'
widths = [16, 8, 32, 64, 128]
modules = ['add']
tech = 'tsmc28'
LoT = []
# # # initial sweep to get estimate of min delay
# freqs = ['7500']
# freqs = [25000, 35000]
# for module in modules:
# for width in widths:
# for freq in freqs:
# LoT += [[module, width, tech, freq]]
# thorough sweep based on estimate of min delay
# # thorough sweep based on estimate of min delay
for m in modules:
for w in widths:
delays = []
@ -69,7 +70,6 @@ for m in modules:
LoT += [[m, w, tech, freq]]
deleteRedundant(LoT)
pool = Pool()
pool.starmap(runCommand, LoT)
pool.close()

View File

@ -117,6 +117,10 @@ if {$tech == "sky130"} {
} else {
set_driving_cell -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk
}
} elseif {$tech == "tsmc28"} {
if ($drive == "INV") {
set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk
}
}
# Set input/output delay
@ -132,6 +136,10 @@ if {$tech == "sky130"} {
} else {
set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs]
}
} elseif {$tech == "tsmc28"} {
if ($drive == "INV") {
set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs]
}
}
# Set the wire load model

View File

@ -98,101 +98,101 @@ $BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
echo "Creating f16_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
echo "Creating f32_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
echo "Creating f64_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
echo "Creating f128_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
echo "Creating f16_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
echo "Creating f32_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
echo "Creating f64_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
echo "Creating f128_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
echo "Creating f16_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
echo "Creating f32_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
echo "Creating f64_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
echo "Creating f128_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
echo "Creating f16_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
echo "Creating f32_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
echo "Creating f64_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
echo "Creating f128_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
echo "Creating f16_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv