This commit is contained in:
DTowersM 2022-06-13 23:34:35 +00:00
commit 7c0f4dd954
56 changed files with 94818 additions and 3406 deletions

@ -1 +1 @@
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230

View File

@ -0,0 +1,23 @@
# Makefile
CC = gcc
CFLAGS = -O3
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# fma.do
#
# run with vsim -do "do fma.do"
# add -c before -do for batch simulation
onbreak {resume}
# create library
vlib worklib
vlog -lint -sv -work worklib fma16.v testbench.v
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
vsim -lib worklib testbenchopt
add wave sim:/testbench_fma16/clk
add wave sim:/testbench_fma16/reset
add wave sim:/testbench_fma16/x
add wave sim:/testbench_fma16/y
add wave sim:/testbench_fma16/z
add wave sim:/testbench_fma16/result
add wave sim:/testbench_fma16/rexpected
run -all

View File

@ -0,0 +1,268 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
`define FFLEN 16
`define Nf 10
`define Ne 5
`define BIAS 15
`define EMIN (-(2**(`Ne-1)-1))
`define EMAX (2**(`Ne-1)-1)
`define NaN 16'h7E00
`define INF 15'h7C00
// rounding modes *** update
`define RZ 3'b00
`define RNE 3'b01
`define RM 3'b10
`define RP 3'b11
module fma16(
input logic [`FFLEN-1:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [`FFLEN-1:0] result);
logic [`Nf:0] xm, ym, zm; // U1.Nf
logic [`Ne-1:0] xe, ye, ze; // B_Ne
logic xs, ys, zs;
logic zs1; // sign before optional negation
logic [2*`Nf+1:0] pm; // U2.2Nf
logic [`Ne:0] pe; // B_Ne+1
logic ps; // sign of product
logic [22:0] rm;
logic [`Ne+1:0] re;
logic rs;
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
logic [`Ne+1:0] re2;
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
endmodule
module mult16(
input logic mul,
input logic [`Nf:0] xm, ym,
input logic [`Ne-1:0] xe, ye,
input logic xs, ys,
output logic [2*`Nf+1:0] pm,
output logic [`Ne:0] pe,
output logic ps);
// only multiply if mul = 1
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
assign ps = xs ^ ys; // negative if X xor Y are negative
endmodule
module add16(
input logic add,
input logic [2*`Nf+1:0] pm, // U2.2Nf
input logic [`Nf:0] zm, // U1.Nf
input logic [`Ne:0] pe, // B_Ne+1
input logic [`Ne-1:0] ze, // B_Ne
input logic ps, zs,
input logic negz,
output logic [22:0] rm,
output logic [`Ne+1:0] re, // B_Ne+2
output logic [`Ne+1:0] re2,
output logic rs);
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
logic [`Nf-1:0] prezsticky;
logic zsticky;
logic effectivesub;
logic rs0;
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
logic [`Ne:0] re1;
// Alignment shift
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
always_comb // AlignCount mux; see Muller page 254
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
else begin AlignCnt = 0; re = {2'b0, ze}; end
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
// Effective subtraction
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
// Adder
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
assign rs0 = r[`Nf*3+7]; // sign of the initial result
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
// Sign Logic
assign rs = ps ^ rs0; // flip the sign if necessary
// Leading zero counter
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
// Normalization shift
always_comb // NormCount mux
if (ExpDiff < 3) begin
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
assign rnormed = r2 << NormCnt; // *** update sticky
/* temporarily comment out to start synth
// One-bit secondary normalization
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
else begin // *** handle sticky
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
else begin rnormed2 = rnormed << 1; re2 = re-1; end
end
// round
assign l = rnormed2[***]; // least significant bit
assign r = rnormed2[***-1]; // rounding bit
assign s = ***; // sticky bit
always_comb
case (roundmode)
RZ: roundup = 0;
RP: roundup = ~rs & (r | s);
RM: roundup = rs & (r | s);
RNE: roundup = r & (s | l);
default: roundup = 0;
endcase
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
*/
// *** need to handle rounding to MAXNUM vs. INFINITY
// add or pass product through
/* assign rm = add ? arm : {1'b0, pm};
assign re = add ? are : {1'b0, pe};
assign rs = add ? ars : ps; */
endmodule
module lzc(
input logic [`Nf*3+7:0] r2,
output logic [`Ne:0] leadingzeros
);
endmodule
module postproc16(
input logic [1:0] roundmode,
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
input logic [22:0] rm,
input logic [`Nf:0] zm, // U1.Nf
input logic [6:0] re,
input logic [`Ne-1:0] ze, // B_Ne
input logic rs, zs, ps,
input logic [`Ne+1:0] re2,
output logic [15:0] result);
logic [9:0] uf, uff;
logic [6:0] ue;
logic [6:0] ueb, uebiased;
logic invalid;
// Special cases
// *** not handling signaling NaN
// *** also add overflow/underflow/inexact
always_comb begin
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
end
always_comb
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
ue = re + 7'b1;
uf = rm[20:11];
end else begin // no normalization shift needed
ue = re;
uf = rm[19:10];
end
// overflow
always_comb begin
ueb = ue-7'd15;
if (ue >= 7'd46) begin // overflow
/* uebiased = 7'd30;
uff = 10'h3ff; */
end else begin
uebiased = ue-7'd15;
uff = uf;
end
end
assign result = {rs, uebiased[4:0], uff};
// add special case handling for zeros, NaN, Infinity
endmodule
module signadj16(
input logic negr, negz,
input logic xs, ys, zs1,
output logic ps, zs);
assign ps = xs ^ ys; // sign of product
assign zs = zs1 ^ negz; // sign of addend
endmodule
module unpack16(
input logic [15:0] x, y, z,
output logic [10:0] xm, ym, zm,
output logic [4:0] xe, ye, ze,
output logic xs, ys, zs,
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
endmodule
module unpacknum16(
input logic [15:0] num,
output logic [10:0] m,
output logic [4:0] e,
output logic s,
output logic zero, inf, nan);
logic [9:0] f; // fraction without leading 1
logic [4:0] eb; // biased exponent
assign {s, eb, f} = num; // pull bit fields out of floating-point number
assign m = {1'b1, f}; // prepend leading 1 to fraction
assign e = eb; // leave bias in exponent ***
assign zero = (e == 0 && f == 0);
assign inf = (e == 31 && f == 0);
assign nan = (e == 31 && f != 0);
endmodule

View File

@ -0,0 +1,24 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
module fma16(
input logic [15:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [15:0] result);
endmodule

View File

@ -0,0 +1,240 @@
#include <stdio.h>
#include <stdint.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
uint16_t zeros[] = {0x0000, 0x8000};
uint16_t infs[] = {0x7C00, 0xFC00};
uint16_t nans[] = {0x7D00, 0x7D01};
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z);
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
fprintf(fptr, desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
y.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
z.v = cases[j].v;
for (k=0; k<=sgn; k++) {
z.v ^= (k<<15);
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, l, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (l=0; l<=sgn; l++) {
z.v ^= (l<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
fclose(fptr);
}
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, sx, sy, sz, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
cases[numCases].v = 0x0000; // add +0 case
cases[numCases+1].v = 0x8000; // add -0 case
numCases += 2;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (sx=0; sx<=sgn; sx++) {
x.v ^= (sx<<15);
for (sy=0; sy<=sgn; sy++) {
y.v ^= (sy<<15);
for (sz=0; sz<=sgn; sz++) {
z.v ^= (sz<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
}
}
fclose(fptr);
}
int main()
{
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: addition
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: FMA
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: Zero, Infinity, NaN
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
// Full test cases with other rounding modes
softfloat_roundingMode = softfloat_round_near_even;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
softfloat_roundingMode = softfloat_round_min;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
softfloat_roundingMode = softfloat_round_max;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
return 0;
}

8
examples/verilog/fma/lint-fma Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
$verilator --lint-only --top-module fma16 fma16.v

2
examples/verilog/fma/sim-fma Executable file
View File

@ -0,0 +1,2 @@
vsim -do "do fma.do"

View File

@ -0,0 +1 @@
vsim -c -do "do fma.do"

1
examples/verilog/fma/synth Executable file
View File

@ -0,0 +1 @@
make -C ../../../synthDC synth DESIGN=fma16

View File

@ -0,0 +1,52 @@
/* verilator lint_off STMTDLY */
module testbench_fma16;
reg clk, reset;
reg [15:0] x, y, z, rexpected;
wire [15:0] result;
reg [7:0] ctrl;
reg [3:0] flagsexpected;
reg mul, add, negp, negz;
reg [1:0] roundmode;
reg [31:0] vectornum, errors;
reg [75:0] testvectors[10000:0];
// instantiate device under test
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors and pulse reset
initial
begin
$readmemh("work/fmul_0.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1; #22; reset = 0;
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
{roundmode, mul, add, negp, negz} = ctrl[5:0];
end
// check results on falling edge of clk
always @(negedge clk)
if (~reset) begin // skip during reset
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
$display("Error: inputs %h * %h + %h", x, y, z);
$display(" result = %h (%h expected)", result, rexpected);
errors = errors + 1;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 'x) begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
#!/usr/bin/perl -w
# torturegen.pl
# David_Harris@hmc.edu 19 April 2022
# Convert TestFloat cases into format for fma16 project torture test
# Strip out cases involving denorms
use strict;
my @basenames = ("add", "mul", "mulAdd");
my @roundingmodes = ("rz", "rd", "ru", "rne");
my @names = ();
foreach my $name (@basenames) {
foreach my $mode (@roundingmodes) {
push(@names, "f16_${name}_$mode.tv");
}
}
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
my $datestring = localtime();
print(TORTURE "// Torture tests generated $datestring by $0\n");
foreach my $tv (@names) {
open(TV, "work/$tv") || die("Can't read $tv");
my $type = &getType($tv); # is it mul, add, mulAdd
my $rm = &getRm($tv); # rounding mode
# if ($rm != 0) { next; } # only do rz
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
my $linecount = 0;
my $babyTorture = 0;
while (<TV>) {
my $line = $_;
$linecount++;
my $density = 10;
if ($type eq "mulAdd") {$density = 500;}
if ($babyTorture) {
$density = 100;
if ($type eq "mulAdd") {$density = 50000;}
}
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
chomp($line); # strip off newline
my @parts = split(/_/, $line);
my ($x, $y, $z, $op, $w, $flags);
$x = $parts[0];
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
$op = $rm << 4;
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
my $opname = sprintf("%02x", $op);
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
$flags = substr($flags, -1); # take last character
if (&fpval($w) eq "NaN") { $w = "7e00"; }
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
my $skip = "";
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
$skip = "Skipped denorm";
}
my $summary = &summary($x, $y, $z, $w, $type);
if ($skip ne "") {
print TORTURE "// $skip $tv line $linecount $line $summary\n"
}
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
}
close(TV);
}
close(TORTURE);
sub fpval {
my $val = shift;
$val = hex($val); # convert hex string to number
my $frac = $val & 0x3FF;
my $exp = ($val >> 10) & 0x1F;
my $sign = $val >> 15;
my $res;
if ($exp == 31 && $frac != 0) { return "NaN"; }
elsif ($exp == 31) { $res = "INF"; }
elsif ($val == 0) { $res = 0; }
elsif ($exp == 0) { $res = "Denorm"; }
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
if ($sign == 1) { $res = "-$res"; }
return $res;
}
sub summary {
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
my $xv = &fpval($x);
my $yv = &fpval($y);
my $zv = &fpval($z);
my $wv = &fpval($w);
if ($type eq "add") { return "$xv + $zv = $wv"; }
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
else {return "$xv * $yv + $zv = $wv"; }
}
sub getType {
my $tv = shift;
if ($tv =~ /mulAdd/) { return("mulAdd"); }
elsif ($tv =~ /mul/) { return "mul"; }
else { return "add"; }
}
sub getRm {
my $tv = shift;
if ($tv =~ /rz/) { return 0; }
elsif ($tv =~ /rne/) { return 1; }
elsif ($tv =~ /rd/) {return 2; }
elsif ($tv =~ /ru/) { return 3; }
else { return "bad"; }
}
sub isdenorm {
my $fp = shift;
my $val = hex($fp);
my $expv = $val >> 10;
$expv = $expv & 0x1F;
my $denorm = 0;
if ($expv == 0 && $val != 0) { $denorm = 1;}
# my $e0 = ($expv == 0);
# my $vn0 = ($val != 0);
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
# print("Num $fp Exp $expv Denorm $denorm Done\n");
return $denorm;
}

View File

@ -0,0 +1,62 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View File

@ -55,20 +55,22 @@
`define Q_NE 32'd15
`define Q_NF 32'd112
`define Q_BIAS 32'd16383
`define Q_FMT 2'd3
`define D_LEN 32'd64
`define D_NE 32'd11
`define D_NF 32'd52
`define D_BIAS 32'd1023
`define D_FMT 32'd1
`define D_FMT 2'd1
`define S_LEN 32'd32
`define S_NE 32'd8
`define S_NF 32'd23
`define S_BIAS 32'd127
`define S_FMT 32'd1
`define S_FMT 2'd0
`define H_LEN 32'd16
`define H_NE 32'd5
`define H_NF 32'd10
`define H_BIAS 32'd15
`define H_FMT 2'd2
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
@ -91,6 +93,12 @@
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */

View File

@ -9,4 +9,4 @@
# sqrt - test square ro
# all - test everything
vsim -do "do testfloat.do rv64fpquad cmp"
vsim -do "do testfloat.do rv64fp mul"

View File

@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
add wave -hex /testbench/dut/core/ebu/HRDATA
add wave -hex /testbench/dut/core/ebu/HWRITE
add wave -hex /testbench/dut/core/ebu/HWDATA
add wave -hex /testbench/dut/core/ebu/HBURST
add wave -hex /testbench/dut/core/ebu/CaptureDataM
add wave -divider

View File

@ -1,102 +1,9 @@
add wave -noupdate /testbenchfp/clk
add wave -noupdate -radix decimal /testbenchfp/VectorNum
add wave -group Other -noupdate /testbenchfp/FrmNum
add wave -group Other -noupdate /testbenchfp/X
add wave -group Other -noupdate /testbenchfp/Y
add wave -group Other -noupdate /testbenchfp/Z
add wave -group Other -noupdate /testbenchfp/Res
add wave -group Other -noupdate /testbenchfp/Ans
add wave -group Rne -noupdate /testbenchfp/FmaRneX
add wave -group Rne -noupdate /testbenchfp/FmaRneY
add wave -group Rne -noupdate /testbenchfp/FmaRneZ
add wave -group Rne -noupdate /testbenchfp/FmaRneRes
add wave -group Rne -noupdate /testbenchfp/FmaRneAns
add wave -group Rz -noupdate /testbenchfp/FmaRzX
add wave -group Rz -noupdate /testbenchfp/FmaRzY
add wave -group Rz -noupdate /testbenchfp/FmaRzZ
add wave -group Rz -noupdate /testbenchfp/FmaRzRes
add wave -group Rz -noupdate /testbenchfp/FmaRzAns
add wave -group Ru -noupdate /testbenchfp/FmaRuX
add wave -group Ru -noupdate /testbenchfp/FmaRuY
add wave -group Ru -noupdate /testbenchfp/FmaRuZ
add wave -group Ru -noupdate /testbenchfp/FmaRuRes
add wave -group Ru -noupdate /testbenchfp/FmaRuAns
add wave -group Rd -noupdate /testbenchfp/FmaRdX
add wave -group Rd -noupdate /testbenchfp/FmaRdY
add wave -group Rd -noupdate /testbenchfp/FmaRdZ
add wave -group Rd -noupdate /testbenchfp/FmaRdRes
add wave -group Rd -noupdate /testbenchfp/FmaRdAns
add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
add wave -group AllSignals -noupdate /*
add wave -group AllSignals -noupdate /testbenchfp/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
add wave -noupdate /testbenchfp/FrmNum
add wave -noupdate /testbenchfp/X
add wave -noupdate /testbenchfp/Y
add wave -noupdate /testbenchfp/Z
add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans

View File

@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0

View File

@ -45,6 +45,10 @@ module ahblite (
input logic IFUBusRead,
output logic [`XLEN-1:0] IFUBusHRDATA,
output logic IFUBusAck,
output logic IFUBusInit,
input logic [2:0] IFUBurstType,
input logic [1:0] IFUTransType,
input logic IFUTransComplete,
// Signals from Data Cache
input logic [`PA_BITS-1:0] LSUBusAdr,
input logic LSUBusRead,
@ -52,7 +56,11 @@ module ahblite (
input logic [`XLEN-1:0] LSUBusHWDATA,
output logic [`XLEN-1:0] LSUBusHRDATA,
input logic [2:0] LSUBusSize,
input logic [2:0] LSUBurstType,
input logic [1:0] LSUTransType,
input logic LSUTransComplete,
output logic LSUBusAck,
output logic LSUBusInit,
// AHB-Lite external signals
(* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
(* mark_debug = "true" *) input logic HREADY, HRESP,
@ -87,6 +95,9 @@ module ahblite (
// Data accesses have priority over instructions. However, if a data access comes
// while an instruction read is occuring, the instruction read finishes before
// the data access can take place.
// *** This is no longer true when adding burst mode. We need to finish the current
// read before doing another read. Need to work this out, but preliminarily we can
// store the current read type in a flop and use that to figure out what burst type to use.
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);
@ -100,19 +111,21 @@ module ahblite (
// interface that might be used in place of the ahblite.
always_comb
case (BusState)
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
else if (LSUBusWrite)NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMREAD: if (~HREADY) NextBusState = MEMREAD;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMWRITE: if (~HREADY) NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
INSTRREAD: if (~HREADY) NextBusState = INSTRREAD;
else NextBusState = IDLE; // if (IFUBusRead still high) *** need to wait?
default: NextBusState = IDLE;
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
else if (LSUBusWrite) NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMREAD: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
else if (LSUTransComplete) NextBusState = IDLE;
else NextBusState = MEMREAD;
MEMWRITE: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
else if (LSUTransComplete) NextBusState = IDLE;
else NextBusState = MEMWRITE;
INSTRREAD: if (IFUTransComplete & LSUBusRead) NextBusState = MEMREAD;
else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
else if (IFUTransComplete) NextBusState = IDLE;
else NextBusState = INSTRREAD;
default: NextBusState = IDLE;
endcase
@ -122,7 +135,7 @@ module ahblite (
assign #1 HADDR = AccessAddress;
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
/* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
000: Single (SINGLE)
@ -133,15 +146,16 @@ module ahblite (
101: 8-beat incrementing burst (INCR8)
110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
111: 16-beat incrementing burst (INCR16)
*/
*** Remove if not necessary
*/
assign HPROT = 4'b0011; // not used; see Section 3.7
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
assign HMASTLOCK = 0; // no locking supported
assign HWRITE = NextBusState == MEMWRITE;
assign HWRITE = (NextBusState == MEMWRITE);
// delay write data by one cycle for
flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
// delay signals for subword writes
flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD);
flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
@ -153,7 +167,9 @@ module ahblite (
assign IFUBusHRDATA = HRDATA;
assign LSUBusHRDATA = HRDATA;
assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
assign IFUBusAck = HREADY & (BusState == INSTRREAD);
assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));
endmodule

View File

@ -0,0 +1,69 @@
`include "wally-config.vh"
module cvtshiftcalc(
input logic XZeroM,
input logic ToInt,
input logic IntToFp,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NF:0] XManM, // input mantissas
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic CvtResDenormUfM,
output logic CvtResUf,
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | LzcInM | 0's if nessisary |
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
{CvtLzcInM, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
endmodule

View File

@ -2,13 +2,12 @@
`include "wally-config.vh"
// FOpCtrlE values
// 111 min
// 110 min
// 101 max
// 010 equal
// 001 less than
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // see above table
@ -20,12 +19,13 @@ module fcmp (
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
output logic CmpNVE, // invalid flag
output logic [`FLEN-1:0] CmpResE // compare resilt
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
output logic [`XLEN-1:0] CmpIntResE // compare resilt
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes;
logic BothZeroE, EitherNaNE, EitherSNaNE;
logic BothZero, EitherNaN, EitherSNaN;
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
@ -36,9 +36,9 @@ module fcmp (
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);
assign BothZeroE = XZeroE&YZeroE;
assign EitherNaNE = XNaNE|YNaNE;
assign EitherSNaNE = XSNaNE|YSNaNE;
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
// flags
@ -47,11 +47,11 @@ module fcmp (
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpNVE = EitherSNaNE;//min
3'b101: CmpNVE = EitherSNaNE;//max
3'b010: CmpNVE = EitherSNaNE;//equal
3'b001: CmpNVE = EitherNaNE;//less than
3'b011: CmpNVE = EitherNaNE;//less than or equal
3'b110: CmpNVE = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal
default: CmpNVE = 1'b0;
endcase
end
@ -112,16 +112,12 @@ module fcmp (
endcase
// when one input is a NaN -output the non-NaN
always_comb
case (FOpCtrlE[2:0])
3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
default: CmpResE = (`FLEN)'(0);
endcase
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
endmodule

View File

@ -10,99 +10,99 @@ module fctrl (
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResultSelD, // select result to be written to fp register
output logic [1:0] FResSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
output logic [1:0] PostProcSelD,
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
);
`define FCTRLW 13
`define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD;
//*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder
always_comb
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
ControlsD = `FCTRLW'b0_0_00_00_000_0_1;
else case(OpD)
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_000_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_001_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_00_000__0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])//***reduce resSel
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_00_000_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.w to int reg
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.d to int reg
else ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
endcase
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s s->lu
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
7'b1111000: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.w.x to fp reg
7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
endcase
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d d->lu
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1111001: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.d.x to fp reg
7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -121,82 +121,61 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// FResultSel:
// 000 - ReadRes - load
// 001 - FMARes - FMA and multiply
// 010 - FAddRes - add and fp to fp
// 011 - FDivRes - divide and squareroot
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
// FResSel:
// 00 - SrcA - move to fp register
// 01 - SgnRes - sign injection
// 10 - CmpRes - min/max
// 11 - CvtRes - convert to fp
// FIntResSel:
// 00 - CmpRes - less than, equal, or less than or equal
// 01 - FSrcX - move to int register
// 10 - ClassRes - classify
// 11 - CvtRes - convert to signed/unsigned int
// Final Res Sel:
// fp int
// 00 other cmp
// 01 postproc cvt
// 10 store class
// 11 mv
// OpCtrl values:
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
// post processing Sel:
// 00 cvt
// 01 div
// 10 fma
// cmp
// fmin = ?111
// fmax = ?101
// feq = ?010
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
// Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00
// 001 - negate sign 00
// 010 - xor sign 00
// 011 - mv to fp 01
// 110 - min 10
// 101 - max 10
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, negate product, negate addend}
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.s.d = 0111
// fcvt.d.s = 0111
// Fmt controls the output for fp -> fp
// convert
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// OpCtrl:
// Fma: {not multiply-add?, negate prod?, negate Z?}
// 000 - fmadd
// 001 - fmsub
// 010 - fnmsub
// 011 - fnmadd
// 100 - mul
// 110 - add
// 111 - sub
// Div:
// 0 - ???
// 1 - ???
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
// Cvt Fp: output format
// 10 - to half
// 00 - to single
// 01 - to double
// 11 - to quad
// Cmp: {equal?, less than?}
// 010 - eq
// 001 - lt
// 011 - le
// 110 - min
// 101 - max
// Sgn:
// 00 - sign
// 01 - negate sign
// 10 - xor sign
endmodule

View File

@ -1,8 +1,5 @@
`include "wally-config.vh"
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
module fcvt (
input logic XSgnE, // input's sign
@ -13,14 +10,13 @@ module fcvt (
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
input logic XZeroE, // is the input zero
input logic XDenormE, // is the input denormalized
input logic XInfE, // is the input infinity
input logic XNaNE, // is the input a NaN
input logic XSNaNE, // is the input a signaling NaN
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
output logic [4:0] CvtFlgE // the conversion's flags
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
output logic CvtResDenormUfE,// does the result underflow or is denormalized
output logic CvtResSgnE, // the result's sign
output logic IntZeroE, // is the integer zero?
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
@ -41,34 +37,8 @@ module fcvt (
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CalcExp; // the calculated expoent
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
logic ResDenormUf;// does the result underflow or is denormalized
logic ResUf; // does the result underflow
logic [`LGLEN+`NF:0] Shifted; // the shifted result
logic [`NE-2:0] NewBias; // the bias of the final result
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [`NE-1:0] OldExp; // the old exponent
logic ResSgn; // the result's sign
logic Sticky; // sticky bit - for rounding
logic Round; // round bit - for rounding
logic LSBFrac; // the least significant bit of the fraction - for rounding
logic CalcPlus1; // the calculated plus 1
logic Plus1; // add one to the final result?
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
logic [`NF-1:0] ResFrac; // the result's fraction
logic [`XLEN+1:0] NegRes; // the negation of the result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic Overflow, Underflow, Inexact, Invalid; // flags
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
logic KillRes; // kill the result?
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
@ -97,8 +67,9 @@ module fcvt (
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZeroE = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
// lzc
@ -107,32 +78,16 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | lzcIn | 0's if nessisary |
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
{LzcIn, {`NF+1{1'b0}}};
// kill the shift if it's negitive
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
@ -144,47 +99,10 @@ module fcvt (
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
// shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
assign Shifted = ShiftIn << ShiftAmt;
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
@ -262,40 +180,11 @@ module fcvt (
// - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent
//
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;
///////////////////////////////////////////////////////////////////////////
@ -307,498 +196,7 @@ module fcvt (
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
///////////////////////////////////////////////////////////////////////////
// rounding
///////////////////////////////////////////////////////////////////////////
endmodule
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// 11 - Plus1
// round to zero - do nothing
// round to -infinity - Plus1 if negative
// round to infinity - Plus1 if positive
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 1x - Plus1
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
if (`FPSIZES == 1) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
end else if (`FPSIZES == 2) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
end else if (`FPSIZES == 3) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
`FMT: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
end
`FMT1: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
end
`FMT2: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
end
default: begin
ToFpSticky = 1'bx;
ToFpRound = 1'bx;
ToFpLSBFrac = 1'bx;
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end else if (`FPSIZES == 4) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
2'h3: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
end
2'h1: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
end
2'h0: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
end
2'h2: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end
always_comb
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn;//round down
3'b011: CalcPlus1 = ~ResSgn;//round up
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// dont round if exact
assign Plus1 = CalcPlus1&(Round|Sticky);
// shift the 1 to the propper position for rounding
// - dont round it converting to integer
if (`FPSIZES == 1) begin
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
end else if (`FPSIZES == 2) begin
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
default: ShiftedPlus1 = 0;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
endcase
end
// kill calcExp if the result is denormalized
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
// trim the result's expoent to size
assign ResExp = FullResExp[`NE-1:0];
///////////////////////////////////////////////////////////////////////////
// flags
///////////////////////////////////////////////////////////////////////////
// calculate the flags
// find the maximum exponent (the exponent and larger overflows)
if (`FPSIZES == 1) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end
// if the result exponent is larger then the maximum possible exponent
// | and the exponent is positive
// | | and the input is not NaN or Infinity
// | | |
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
// if the result is denormalized or underflowed
// | and the result did not round into normal values
// | | and the result is not exact
// | | | and the result isn't NaN
// | | | |
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
// if there were bits thrown away
// | if overflowed or underflowed
// | | and if not a NaN
// | | |
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
// if the result is too small to be represented and not 0
// | and if the result is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// if an input was a singaling NaN(and we're using a FP input)
// |
assign FpInvalid = (XSNaNE&~IntToFp);
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// if the input is NaN or infinity
// | if the integer result overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the result doesn't round to zero
// | | | | or the result rounds up out of bounds
// | | | | and the result didn't underflow
// | | | | |
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive result rounds up out of range
// select the inexact flag to output
assign Invalid = ToInt ? IntInvalid : FpInvalid;
// pack the flags together
// - fp -> int does not set the overflow or underflow flags
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
///////////////////////////////////////////////////////////////////////////
// result selection
///////////////////////////////////////////////////////////////////////////
// determine if you shoould kill the result
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
if (`FPSIZES == 1) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
end else begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
NaNRes = 1'bx;
InfRes = 1'bx;
UfRes = 1'bx;
Res = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
end else begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
end else begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
end
2'h0: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
end else begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
end
2'h2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
end else begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
end
endcase
end
// choose the floating point result
// - if the input is NaN (and using the NaN input) output the NaN result
// - if the input is infinity or the output overflows
// - kill the InfE signal if the input isn't a floating point value
// - if killing the result output the underflow result
// - otherwise output the normal result
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
(XInfE&~IntToFp)|Overflow ? InfRes :
KillRes ? UfRes :
Res;
// *** probably can optimize the negation
// select the overflow integer result
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged result should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive result if needed
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal result (trmined and sign extended if nessisary)
assign CvtIntResE = Invalid ? OfIntRes :
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -34,7 +34,7 @@ module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResultSelM, // the result being selected
input logic [1:0] FResSelM, // the result being selected
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
);
@ -47,10 +47,12 @@ module fhazard(
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
@ -59,7 +61,7 @@ module fhazard(
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
@ -68,7 +70,7 @@ module fhazard(
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W

152
pipelined/src/fpu/flags.sv Normal file
View File

@ -0,0 +1,152 @@
`include "wally-config.vh"
module flags(
input logic XSgnM,
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
input logic [`NE+1:0] RoundExp, // exponent of the normalized sum
input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
output logic Invalid, Overflow, Underflow, // flags used to select the res
output logic [4:0] PostProcFlgM // flags
);
logic SigNaN; // is an input a signaling NaN
logic Inexact; // inexact flag
logic FpInexact; // floating point inexact flag
logic IntInexact; // integer inexact flag
logic IntInvalid; // integer invalid flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic DivByZero;
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
if (`FPSIZES == 1) begin
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end
// if the result is greater than or equal to the max exponent
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = (FullResExp>={2'b0, MaxExp}) & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
// detecting tininess after rounding
// the exponent is negitive
// | the result is denormalized
// | | the result is normal and rounded from a denorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
// if the input is NaN or infinity
// | if the integer res overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the res doesn't round to zero
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaNM|XInfM|Overflow|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp) | (IntInvalid&CvtOp&ToInt);
assign DivByZero = YZeroM&DivOp;
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlgM = {Invalid, DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
endmodule

View File

@ -30,73 +30,6 @@
`include "wally-config.vh"
module fma(
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
input logic XSgnM, YSgnM, // input signs - memory stage
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
input logic ZDenormE, // is denorm
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
input logic XNaNM, YNaNM, ZNaNM, // is NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
input logic XInfM, YInfM, ZInfM, // is infinity
output logic [`FLEN-1:0] FMAResM, // FMA result
output logic [4:0] FMAFlgM); // FMA flags
//fma/mult/add
// fmadd = 000
// fmsub = 001
// fnmsub = 010 -(a*b)+c
// fnmadd = 011 -(a*b)-c
// fmul = 100
// fadd = 110
// fsub = 111
// signals transfered between pipeline stages
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
logic Mult;
logic ZDenormM;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
// E/M pipeline registers
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
.FMAResM, .FMAFlgM);
endmodule
//*** in al units before putting into : ? put in a seperate signal
module fma1(
input logic XSgnE, YSgnE, ZSgnE, // input's signs
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
@ -111,7 +44,7 @@ module fma1(
output logic InvZE, // intert Z
output logic ZSgnEffE, // the modified Z sign
output logic PSgnE, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
);
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
@ -151,7 +84,7 @@ module fma1(
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);
// Choose the positive sum and accompanying LZA result.
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
@ -332,7 +265,7 @@ endmodule
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
endmodule
module fma2(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
output logic [`FLEN-1:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [`NF-1:0] ResultFrac; // Result fraction
logic [`NE-1:0] ResultExp; // Result exponent
logic ResultSgn, ResultSgnTmp; // Result sign
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+1:0] NormSum; // normalized sum
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic ResultDenorm; // is the result denormalized
logic Sticky, UfSticky; // Sticky bit
logic CalcPlus1; // do you add or subtract one for rounding
logic UfPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow; // flags
logic Guard, Round; // bits needed to determine rounding
logic UfLSBNormSum; // bits needed to determine rounding for underflow flag
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
.CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
.XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
.FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
// *** use NF where needed
endmodule
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic Underflow,
input logic InvZM,
input logic NegSumM,
input logic SumZero,
input logic Mult,
output logic ResultSgnTmp,
output logic ResultSgn
);
logic ZeroSgn;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
endmodule
module normalize(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
output logic [`NF+1:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
output logic ResultDenorm // is the result denormalized
);
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign SumExpTmp = SumExpTmpTmp;
end else if (`FPSIZES == 2) begin
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: SumExpTmp = SumExpTmpTmp;
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
default: SumExpTmp = `NE+2'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: SumExpTmp = SumExpTmpTmp;
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
// Normalize the sum
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
// LZA correction
assign LZAPlus1 = SumShifted[3*`NF+7];
assign LZAPlus2 = SumShifted[3*`NF+8];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
// Calculate the sticky bit
if (`FPSIZES == 1) begin
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
end else if (`FPSIZES == 2) begin
// 3*NF+5 - NF1 - 3
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
end else if (`FPSIZES == 3) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
end else if (`FPSIZES == 4) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
end
assign UfSticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
// recalculate if the result is denormalized
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
endmodule
module fmaround(
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+1:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResultSgnTmp, // the result's sign
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
output logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`NE-1:0] ResultExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
);
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfGuard; // guard bit used to caluculate underflow
logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa
logic UfRound;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
if (`FPSIZES == 1) begin
// determine guard, round, and least significant bit of the result
assign Round = NormSum[1];
assign LSBNormSum = NormSum[2];
// used to determine underflow flag
assign UfRound = NormSum[0];
end else if (`FPSIZES == 2) begin
// \/-------------NF---------------,
// | NF1 | 2 | |
// '-------NF1------^
// determine guard, round, and least significant bit of the result
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// used to determine underflow flag
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
`FMT1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF1+1];
LSBNormSum = NormSum[`NF-`NF1+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF1];
end
`FMT2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF2+1];
LSBNormSum = NormSum[`NF-`NF2+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF2];
end
default: begin
Round = 1'bx;
LSBNormSum = 1'bx;
UfRound = 1'bx;
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
2'h1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`D_NF+1];
LSBNormSum = NormSum[`NF-`D_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`D_NF];
end
2'h0: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`S_NF+1];
LSBNormSum = NormSum[`NF-`S_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`S_NF];
end
2'h2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`H_NF+1];
LSBNormSum = NormSum[`NF-`H_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`H_NF];
end
endcase
end
end
// used to determine underflow flag
assign UfLSBNormSum = Round;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
assign NormSumTruncated = NormSum[`NF+1:2];
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[`NE-1:0];
endmodule
module fmaflags(
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
output logic Invalid, Overflow, Underflow, // flags used to select the result
output logic [4:0] FMAFlgM // FMA flags
);
logic SigNaN; // is an input a signaling NaN
logic GtMaxExp; // is exponent greater than the maximum
logic UnderflowFlag, Inexact; // flags
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
if (`FPSIZES == 1) begin
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
end else if (`FPSIZES == 2) begin
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
`FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
`FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
default: GtMaxExp = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
endcase
end
end
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
endmodule
module resultselect(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
input logic CalcPlus1, // rounding bits
input logic [`FLEN:0] RoundAdd, // how much to add to the result
input logic Invalid, Overflow, Underflow, // flags
input logic ResultDenorm, // is the result denormalized
input logic [`NE-1:0] ResultExp, // Result exponent
input logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`FLEN-1:0] FMAResM // FMA final result
);
logic InfSgn;
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
if (`FPSIZES == 1) begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNResult = (`FLEN)'(0);
YNaNResult = (`FLEN)'(0);
ZNaNResult = (`FLEN)'(0);
InvalidResult = (`FLEN)'(0);
end else begin
XNaNResult = (`FLEN)'(0);
end
OverflowResult = (`FLEN)'(0);
KillProdResult = (`FLEN)'(0);
UnderflowResult = (`FLEN)'(0);
InfResult = (`FLEN)'(0);
NormResult = (`FLEN)'(0);
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
end
if(`IEEE754) begin
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end else begin
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end
endmodule

View File

@ -0,0 +1,127 @@
`include "wally-config.vh"
module fmashiftcalc(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic SumZero, // is the result denormalized - calculated before LZA corection
output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
);
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign ConvNormSumExp = NormSumExp;
end else if (`FPSIZES == 2) begin
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: ConvNormSumExp = NormSumExp;
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: ConvNormSumExp = `NE+2'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: ConvNormSumExp = NormSumExp;
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
assign FmaShiftIn = {3'b0, SumM};
assign FmaShiftAmt = FmaNormCntM+DenormShift;
endmodule

View File

@ -45,6 +45,8 @@ module fpu (
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
@ -68,24 +70,24 @@ module fpu (
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic FWriteIntM; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic XSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
@ -95,7 +97,7 @@ module fpu (
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE; // is the input denormalized
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
@ -104,24 +106,43 @@ module fpu (
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
logic FOpCtrlQ;
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
@ -133,10 +154,20 @@ module fpu (
// DECODE STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// |||||||||||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// |||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
.FmtD, .FrmD, .FWriteIntD);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -150,20 +181,31 @@ module fpu (
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
// EXECUTION STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||||||||||||
// |||
// |||
// |||||||||
// |||
// |||
// ||||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FpResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FpResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FpResM, FForwardZE, FPreSrcZE);
generate
@ -178,7 +220,7 @@ module fpu (
endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
generate
@ -201,21 +243,12 @@ module fpu (
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fma - does multiply, add, and multiply-add instructions
fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE,
.ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE);
// fpdivsqrt using Goldschmidt's iteration
if(`FLEN == 64) begin
@ -245,11 +278,14 @@ module fpu (
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE,
.FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE,
.CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE,
.CvtLzcInE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
@ -269,16 +305,16 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// select a result that may be written to the FP register
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = FSrcXE[`XLEN-1:0];
else
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram
@ -286,33 +322,68 @@ module fpu (
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE},
{FRegWriteM, FResultSelM, FrmM, FmtM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// |||||| ||||||
// ||| ||| ||| |||
// ||| ||||| |||
// ||| ||| |||
// ||| |||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM},
{FRegWriteW, FResultSelW, FmtW});
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// ||| |||
// ||| ||| |||
// ||| ||||| |||
// ||| ||| ||| |||
// |||||| ||||||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
@ -328,6 +399,6 @@ module fpu (
endgenerate
// select the result to be written to the FP register
if(`FLEN>=64)
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
endmodule // fpu

View File

@ -0,0 +1,29 @@
`include "wally-config.vh"
module lzacorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] SumExp // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
endmodule

View File

@ -0,0 +1,46 @@
`include "wally-config.vh"
// convert shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
module normshift(
input logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt, // normalization shift count
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // is the sum zero
output logic [`NORMSHIFTSZ-1:0] Shifted // is the sum zero
);
assign Shifted = ShiftIn << ShiftAmt;
endmodule

View File

@ -0,0 +1,203 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module postprocess(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
);
logic [`NF-1:0] ResFrac; // Result fraction
logic [`NE-1:0] ResExp; // Result exponent
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
logic SumZero; // is the sum zero
logic Sticky; // Sticky bit
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
logic Mult; // multiply opperation
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [3*`NF+8:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic Overflow, Underflow, Invalid; // flags
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`NE+1:0] RoundExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
logic CvtResUf;
logic DivOp;
logic InfIn;
logic ResSgn;
logic NaNIn;
logic UfLSBRes;
logic Sqrt;
logic [`FMTBITS-1:0] OutFmt;
// signals to help readability
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign ToInt = FWriteIntM;
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
assign CvtOp = (PostProcSelM == 2'b00);
assign FmaOp = (PostProcSelM == 2'b10);
assign DivOp = (PostProcSelM == 2'b01);
assign Sqrt = FOpCtrlM[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: FmtM contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
always_comb
case(PostProcSelM)
2'b10: begin // fma
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
end
2'b01: begin //div
ShiftAmt = 0;//{DivShiftAmt};
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
end
default: begin
ShiftAmt = 0;
ShiftIn = 0;
end
endcase
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
.SumZero, .Shifted, .SumExp, .CorrShifted);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
.InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
.ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round,
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM,
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
.FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
endmodule

View File

@ -0,0 +1,282 @@
`include "wally-config.vh"
module resultselect(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic XZeroM,
input logic IntZeroM,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
input logic CvtOp,
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
input logic FmaOp,
input logic Plus1,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ResSgn, // the res's sign
input logic [`FLEN:0] RoundAdd, // how much to add to the res
input logic Invalid, Overflow, // flags
input logic CvtResUf,
input logic [`NE-1:0] ResExp, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent
input logic [`NF-1:0] ResFrac, // Res fraction
output logic [`FLEN-1:0] PostProcResM, // final res
output logic [1:0] NegResMSBS,
output logic [`XLEN-1:0] FCvtIntResM // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
logic OfResMax;
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic [`XLEN+1:0] NegRes; // the negation of the result
logic KillRes;
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
assign NormRes = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
KillProdRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
if(`IEEE754) begin
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
YNaNM&~CvtOp ? YNaNRes :
ZNaNM&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end else begin
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end
///////////////////////////////////////////////////////////////////////////////////////
//
// ||||||||||| ||| ||| |||||||||||||
// ||| |||||| ||| |||
// ||| ||| ||| ||| |||
// ||| ||| |||||| |||
// ||||||||||| ||| ||| |||
//
///////////////////////////////////////////////////////////////////////////////////////
// *** probably can optimize the negation
// select the overflow integer res
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive res if needed
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
//*** false critical path probably
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntResM = Invalid ? OfIntRes :
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -0,0 +1,50 @@
`include "wally-config.vh"
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic InvZM,
input logic ZInfM,
input logic InfIn,
input logic NegSumM,
input logic [1:0] PostProcSelM,
input logic [`NE+1:0] SumExp,
input logic SumZero,
input logic Mult,
input logic Round,
input logic Sticky,
input logic CvtResSgnM,
output logic ResSgn
);
logic ZeroSgn;
logic InfSgn;
logic FmaResSgn;
logic FmaResSgnTmp;
logic Underflow;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
always_comb
case(PostProcSelM)
2'b10: ResSgn = FmaResSgn; // fma
2'b00: ResSgn = CvtResSgnM; // cvt
2'b01: ResSgn = 0; // divide
default: ResSgn = 1'bx;
endcase
endmodule

316
pipelined/src/fpu/round.sv Normal file
View File

@ -0,0 +1,316 @@
`include "wally-config.vh"
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
input logic CvtOp,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
output logic [`NE-1:0] ResExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`NE+1:0] RoundExp,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, UfLSBRes // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
assign IntRes = CvtOp & ToInt;
assign FpRes = ~IntRes;
// sticky bit calculation
if (`FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
FpLSBRes = 1'bx;
FpUfRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
// determine the result to be roundned
assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = 0; // divide
default: RoundExp = 0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
assign ResExp = FullResExp[`NE-1:0];
endmodule

View File

@ -61,6 +61,8 @@ module datapath (
(* mark_debug = "true" *) input logic RegWriteW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [1:0] FResSelW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
@ -120,14 +122,17 @@ module datapath (
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end
// handle Store Conditional result if atomic extension supported

View File

@ -61,6 +61,8 @@ module ieu (
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [1:0] FResSelW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
@ -105,8 +107,8 @@ module ieu (
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(

View File

@ -38,9 +38,13 @@ module ifu (
// Bus interface
(* mark_debug = "true" *) input logic [`XLEN-1:0] IFUBusHRDATA,
(* mark_debug = "true" *) input logic IFUBusAck,
(* mark_debug = "true" *) input logic IFUBusInit,
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUBusAdr,
(* mark_debug = "true" *) output logic IFUBusRead,
(* mark_debug = "true" *) output logic IFUStallF,
(* mark_debug = "true" *) output logic [2:0] IFUBurstType,
(* mark_debug = "true" *) output logic [1:0] IFUTransType,
(* mark_debug = "true" *) output logic IFUTransComplete,
(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF,
// Execute
output logic [`XLEN-1:0] PCLinkE,
@ -201,8 +205,8 @@ module ifu (
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
busdp(.clk, .reset,
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(),
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
.LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
.WordCount(),
.DCacheFetchLine(ICacheFetchLine),

View File

@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
// bus interface
input logic [`XLEN-1:0] LSUBusHRDATA,
input logic LSUBusAck,
input logic LSUBusInit,
output logic LSUBusWrite,
output logic LSUBusRead,
output logic [2:0] LSUBusSize,
output logic [2:0] LSUBusSize,
output logic [2:0] LSUBurstType,
output logic [1:0] LSUTransType, // For AHBLite
output logic LSUTransComplete,
input logic [2:0] LSUFunct3M,
output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
output logic [LOGWPL-1:0] WordCount,
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
logic [`PA_BITS-1:0] LocalLSUBusAdr;
logic [LOGWPL-1:0] WordCountDelayed;
// *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
// *** better name than DCacheBusWriteData
genvar index;
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
logic [WORDSPERLINE-1:0] CaptureWord;
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
.q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
end
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
endmodule

View File

@ -41,6 +41,7 @@ module busfsm #(parameter integer WordCountThreshold,
input logic DCacheFetchLine,
input logic DCacheWriteLine,
input logic LSUBusAck,
input logic LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
input logic CPUBusy,
input logic CacheableM,
@ -48,10 +49,13 @@ module busfsm #(parameter integer WordCountThreshold,
output logic LSUBusWrite,
output logic LSUBusWriteCrit,
output logic LSUBusRead,
output logic [2:0] LSUBurstType,
output logic LSUTransComplete,
output logic [1:0] LSUTransType,
output logic DCacheBusAck,
output logic BusCommittedM,
output logic SelUncachedAdr,
output logic [LOGWPL-1:0] WordCount);
output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
@ -61,7 +65,8 @@ module busfsm #(parameter integer WordCountThreshold,
logic CntReset;
logic WordCountFlag;
logic [LOGWPL-1:0] NextWordCount;
logic UnCachedAccess;
logic UnCachedAccess, UnCachedRW;
logic [2:0] LocalBurstType;
typedef enum logic [2:0] {STATE_BUS_READY,
@ -75,18 +80,27 @@ module busfsm #(parameter integer WordCountThreshold,
(* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;
// Used to send address for address stage of AHB.
flopenr #(LOGWPL)
WordCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextWordCount),
.q(WordCount));
.q(WordCount));
// Used to store data from data phase of AHB.
flopenr #(LOGWPL)
WordCountDelayedReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(WordCount),
.q(WordCountDelayed));
assign NextWordCount = WordCount + 1'b1;
assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
assign CntEn = PreCntEn & LSUBusAck;
assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.
assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;
@ -120,14 +134,29 @@ module busfsm #(parameter integer WordCountThreshold,
endcase
end
always_comb begin
case(WordCountThreshold)
0: LocalBurstType = 3'b000;
3: LocalBurstType = 3'b011; // INCR4
7: LocalBurstType = 3'b101; // INCR8
15: LocalBurstType = 3'b111; // INCR16
default: LocalBurstType = 3'b001; // INCR without end.
endcase
end
assign CntReset = BusCurrState == STATE_BUS_READY;
// Would these be better as always_comb statements or muxes?
assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
// Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00;
// Reset if we aren't initiating a transaction or if we are finishing a transaction.
assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete;
assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
(BusCurrState == STATE_BUS_UNCACHED_READ) |
(BusCurrState == STATE_BUS_FETCH) |
(BusCurrState == STATE_BUS_WRITE);
assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
@ -139,6 +168,10 @@ module busfsm #(parameter integer WordCountThreshold,
(BusCurrState == STATE_BUS_UNCACHED_READ);
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
// Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;
assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
assign BusCommittedM = BusCurrState != STATE_BUS_READY;

View File

@ -66,9 +66,13 @@ module lsu (
(* mark_debug = "true" *) output logic LSUBusRead,
(* mark_debug = "true" *) output logic LSUBusWrite,
(* mark_debug = "true" *) input logic LSUBusAck,
(* mark_debug = "true" *) input logic LSUBusInit,
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
(* mark_debug = "true" *) output logic [2:0] LSUBurstType,
(* mark_debug = "true" *) output logic [1:0] LSUTransType,
(* mark_debug = "true" *) output logic LSUTransComplete,
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -211,7 +215,7 @@ module lsu (
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
.clk, .reset,
.LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.WordCount, .LSUBusWriteCrit,
.LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
.DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,

View File

@ -525,7 +525,7 @@ module ppa_decoder #(parameter WIDTH = 8) (
end
endmodule
module ppa_mux2_1 #(parameter WIDTH = 1) (
module ppa_mux2d_1 #(parameter WIDTH = 1) (
input logic [WIDTH-1:0] d0, d1,
input logic s,
output logic [WIDTH-1:0] y);
@ -533,7 +533,7 @@ module ppa_mux2_1 #(parameter WIDTH = 1) (
assign y = s ? d1 : d0;
endmodule
module ppa_mux4_1 #(parameter WIDTH = 1) (
module ppa_mux4d_1 #(parameter WIDTH = 1) (
input logic [WIDTH-1:0] d0, d1, d2, d3,
input logic [1:0] s,
output logic [WIDTH-1:0] y);
@ -541,7 +541,7 @@ module ppa_mux4_1 #(parameter WIDTH = 1) (
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
endmodule
module ppa_mux8_1 #(parameter WIDTH = 1) (
module ppa_mux8d_1 #(parameter WIDTH = 1) (
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
input logic [2:0] s,
output logic [WIDTH-1:0] y);

View File

@ -48,7 +48,7 @@ module gpio (
logic [31:0] input0d, input1d, input2d, input3d;
logic [31:0] input_val, input_en, output_en, output_val;
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip;
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor;
logic initTrans, memwrite;
logic [7:0] entry, entryd;
@ -91,6 +91,7 @@ module gpio (
high_ip <= #1 0;
low_ie <= #1 0;
low_ip <= #1 0;
out_xor <= #1 0;
end else begin
// writes
if (memwrite)
@ -104,7 +105,7 @@ module gpio (
8'h20: fall_ie <= #1 Din;
8'h28: high_ie <= #1 Din;
8'h30: low_ie <= #1 Din;
8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
8'h40: out_xor <= #1 Din;
endcase
/* verilator lint_on CASEINCOMPLETE */
// reads
@ -121,7 +122,7 @@ module gpio (
8'h2C: Dout <= #1 high_ip;
8'h30: Dout <= #1 low_ie;
8'h34: Dout <= #1 low_ip;
8'h40: Dout <= #1 0; // OUT_XOR reads as 0
8'h40: Dout <= #1 out_xor;
default: Dout <= #1 0;
endcase
// interrupts
@ -152,7 +153,7 @@ module gpio (
flop #(32) sync2(HCLK,input1d,input2d);
flop #(32) sync3(HCLK,input2d,input3d);
assign input_val = input3d;
assign GPIOPinsOut = output_val;
assign GPIOPinsOut = output_val ^ out_xor;
assign GPIOPinsEn = output_en;
assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};

View File

@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
output logic HRESPRam, HREADYRam
);
// Desired changes.
// 1. find a way to merge read and write address into 1 port.
// 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
// 3. implement burst.
// 4. remove the configurable latency.
localparam ADDR_WIDTH = $clog2(RANGE/8);
localparam OFFSET = $clog2(`XLEN/8);
logic [`XLEN/8-1:0] ByteMask;
logic [31:0] HADDRD, RamAddr;
//logic prevHREADYRam, risingHREADYRam;
logic initTrans;
logic memwrite, memwriteD, memread;
logic nextHREADYRam;
//logic [3:0] busycount;
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
// a new AHB transactions starts when HTRANS requests a transaction,
// the peripheral is selected, and the previous transaction is completing
assign initTrans = HREADY & HSELRam & (HTRANS[1]);
assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface
assign memwrite = initTrans & HWRITE;
assign memread = initTrans & ~HWRITE;
flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD);
flopenr #(32) haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);
/* // busy FSM to extend READY signal
always @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
busycount <= 0;
HREADYRam <= #1 0;
end else begin
if (initTrans) begin
busycount <= 0;
HREADYRam <= #1 0;
end else if (~HREADYRam) begin
if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2
HREADYRam <= #1 1;
end else begin
busycount <= busycount + 1;
end
end
end */
// Stall on a read after a write because the RAM can't take both adddresses on the same cycle
assign nextHREADYRam = ~(memwriteD & memread);
// assign nextHREADYRam = ~(memwriteD & ~memwrite);
flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
// assign HREADYRam = ~(memwriteD & ~memwrite);
assign HRESPRam = 0; // OK
localparam ADDR_WIDTH = $clog2(RANGE/8);
localparam OFFSET = $clog2(`XLEN/8);
/* // Rising HREADY edge detector
// Indicates when ram is finishing up
// Needed because HREADY may go high for other reasons,
// and we only want to write data when finishing up.
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
/*
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
memory(.clk(HCLK), .reA(1'b1),
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
// On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);
// Byte mask for subword writes
// ***the CLINT and other peripherals duplicate this hardware
// *** it shoudl be centralized and sent over HWSTRB
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
// single-ported RAM
bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));

View File

@ -93,10 +93,12 @@ module wallypipelinedcore (
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
// memory management unit signals
@ -134,13 +136,16 @@ module wallypipelinedcore (
logic [`PA_BITS-1:0] IFUBusAdr;
logic [`XLEN-1:0] IFUBusHRDATA;
logic IFUBusRead;
logic IFUBusAck;
logic IFUBusAck, IFUBusInit;
logic [2:0] IFUBurstType;
logic [1:0] IFUTransType;
logic IFUTransComplete;
// AHB LSU interface
logic [`PA_BITS-1:0] LSUBusAdr;
logic LSUBusRead;
logic LSUBusWrite;
logic LSUBusAck;
logic LSUBusAck, LSUBusInit;
logic [`XLEN-1:0] LSUBusHRDATA;
logic [`XLEN-1:0] LSUBusHWDATA;
@ -152,6 +157,9 @@ module wallypipelinedcore (
logic [4:0] InstrClassM;
logic InstrAccessFaultF;
logic [2:0] LSUBusSize;
logic [2:0] LSUBurstType;
logic [1:0] LSUTransType;
logic LSUTransComplete;
logic DCacheMiss;
logic DCacheAccess;
@ -166,8 +174,8 @@ module wallypipelinedcore (
.StallF, .StallD, .StallE, .StallM,
.FlushF, .FlushD, .FlushE, .FlushM,
// Fetch
.IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
.IFUBusRead, .IFUStallF,
.IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
.IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
.ICacheAccess, .ICacheMiss,
// Execute
@ -224,6 +232,8 @@ module wallypipelinedcore (
.CSRReadValW, .ReadDataM, .MDUResultW,
.RdW, .ReadDataW,
.InstrValidM,
.FCvtIntResW,
.FResSelW,
// hazards
.StallD, .StallE, .StallM, .StallW,
@ -247,8 +257,8 @@ module wallypipelinedcore (
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataM, .FlushDCacheM,
// connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
// connect to csr or privilege and stay the same.
.PrivilegeModeW, .BigEndianM, // connects to csr
@ -279,13 +289,22 @@ module wallypipelinedcore (
ahblite ebu(// IFU connections
.clk, .reset,
.UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
.IFUBusAdr,
.IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
.IFUBusAdr, .IFUBusRead,
.IFUBusHRDATA,
.IFUBurstType,
.IFUTransType,
.IFUTransComplete,
.IFUBusAck,
.IFUBusInit,
// Signals from Data Cache
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
.LSUBusHRDATA,
.LSUBusSize,
.LSUBurstType,
.LSUTransType,
.LSUTransComplete,
.LSUBusAck,
.LSUBusInit,
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
.HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
@ -375,6 +394,8 @@ module wallypipelinedcore (
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FCvtIntResW, // fp -> int conversion result to be stored in int register
.FResSelW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)

View File

@ -1,289 +0,0 @@
4000000000000000_4000000000000000_3ff0000000000000
c018000000000000_4000000000000000_c008000000000000
4024000000000000_4000000000000000_4014000000000000
c032000000000000_4000000000000000_c022000000000000
4041000000000000_4000000000000000_4031000000000000
c05c000000000000_4000000000000000_c04c000000000000
406e000000000000_4000000000000000_405e000000000000
c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
408199999999999a_4000000000000000_407199999999999a
c093333333333333_4000000000000000_c083333333333333
40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
c0b004189374bc6a_4000000000000000_c0a004189374bc6a
40c00068db8bac71_4000000000000000_40b00068db8bac71
c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
40e5555555555555_4000000000000000_40d5555555555555
c0f999999999999a_4000000000000000_c0e999999999999a
410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
4000000000000000_c018000000000000_bfe5555555555555
c018000000000000_c018000000000000_3ff0000000000000
4024000000000000_c018000000000000_c00aaaaaaaaaaaab
c032000000000000_c018000000000000_4018000000000000
4041000000000000_c018000000000000_c026aaaaaaaaaaab
c05c000000000000_c018000000000000_4032aaaaaaaaaaab
406e000000000000_c018000000000000_c044000000000000
c07ffff583a53b8e_c018000000000000_4055554e57c37d09
408199999999999a_c018000000000000_c067777777777778
c093333333333333_c018000000000000_4079999999999999
40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
c0b004189374bc6a_c018000000000000_40955acb6f46508d
40c00068db8bac71_c018000000000000_c0a555e124ba3b41
c0dd1745d1745d17_c018000000000000_40b364d9364d9365
40e5555555555555_c018000000000000_c0cc71c71c71c71c
c0f999999999999a_c018000000000000_40d1111111111111
410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
4000000000000000_4024000000000000_3fd999999999999a
c018000000000000_4024000000000000_bfe3333333333333
4024000000000000_4024000000000000_3ff0000000000000
c032000000000000_4024000000000000_c00ccccccccccccd
4041000000000000_4024000000000000_401b333333333333
c05c000000000000_4024000000000000_c026666666666666
406e000000000000_4024000000000000_4038000000000000
c07ffff583a53b8e_4024000000000000_c0499991361dc93e
408199999999999a_4024000000000000_405c28f5c28f5c2a
c093333333333333_4024000000000000_c06eb851eb851eb8
40a028f5c28f5c29_4024000000000000_4079db22d0e56042
c0b004189374bc6a_4024000000000000_c089a027525460aa
40c00068db8bac71_4024000000000000_40999a415f45e0b5
c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
40e5555555555555_4024000000000000_40b1111111111111
c0f999999999999a_4024000000000000_c0c47ae147ae147b
410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
4000000000000000_c032000000000000_bfcc71c71c71c71c
c018000000000000_c032000000000000_3fd5555555555555
4024000000000000_c032000000000000_bfe1c71c71c71c72
c032000000000000_c032000000000000_3ff0000000000000
4041000000000000_c032000000000000_c00e38e38e38e38e
c05c000000000000_c032000000000000_4018e38e38e38e39
406e000000000000_c032000000000000_c02aaaaaaaaaaaab
c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
408199999999999a_c032000000000000_c04f49f49f49f4a0
c093333333333333_c032000000000000_4051111111111111
40a028f5c28f5c29_c032000000000000_c06cba9876543210
c0b004189374bc6a_c032000000000000_407c790f3f086b67
40c00068db8bac71_c032000000000000_c08c7281864da457
c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
40e5555555555555_c032000000000000_c0a2f684bda12f68
c0f999999999999a_c032000000000000_40b6c16c16c16c17
410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
c018000000000000_4041000000000000_bfc6969696969697
4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
4041000000000000_4041000000000000_3ff0000000000000
c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
406e000000000000_4041000000000000_401c3c3c3c3c3c3c
c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
408199999999999a_4041000000000000_4030909090909091
c093333333333333_4041000000000000_c042121212121212
40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
c0b004189374bc6a_4041000000000000_c06e25d3e863448b
40c00068db8bac71_4041000000000000_407e1ee37f25085c
c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
40e5555555555555_4041000000000000_4094141414141414
c0f999999999999a_4041000000000000_c0a8181818181818
410c71c71c71c71c_4041000000000000_40bac5701ac5701a
4000000000000000_c05c000000000000_bfa2492492492492
c018000000000000_c05c000000000000_3fbb6db6db6db6db
4024000000000000_c05c000000000000_bfc6db6db6db6db7
c032000000000000_c05c000000000000_3fd4924924924925
4041000000000000_c05c000000000000_bfe36db6db6db6db
c05c000000000000_c05c000000000000_3ff0000000000000
406e000000000000_c05c000000000000_c001249249249249
c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
408199999999999a_c05c000000000000_c0241d41d41d41d5
c093333333333333_c05c000000000000_4035f15f15f15f16
40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
40c00068db8bac71_c05c000000000000_c062499c689fa081
c0dd1745d1745d17_c05c000000000000_40709f959c427e56
40e5555555555555_c05c000000000000_c088618618618618
c0f999999999999a_c05c000000000000_409d41d41d41d41e
410c71c71c71c71c_c05c000000000000_c0a0410410410410
4000000000000000_406e000000000000_3f91111111111111
c018000000000000_406e000000000000_bfa999999999999a
4024000000000000_406e000000000000_3fb5555555555555
c032000000000000_406e000000000000_bfc3333333333333
4041000000000000_406e000000000000_3fd2222222222222
c05c000000000000_406e000000000000_bfedddddddddddde
406e000000000000_406e000000000000_3ff0000000000000
c07ffff583a53b8e_406e000000000000_c001110b796930d4
408199999999999a_406e000000000000_4012c5f92c5f92c6
c093333333333333_406e000000000000_c0247ae147ae147b
40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
c0b004189374bc6a_406e000000000000_c041156f8c384071
40c00068db8bac71_406e000000000000_40511180ea2e95ce
c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
40e5555555555555_406e000000000000_4076c16c16c16c16
c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
410c71c71c71c71c_406e000000000000_409e573ac901e573
4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
4024000000000000_c07ffff583a53b8e_bfa400068dbae089
c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
c093333333333333_c07ffff583a53b8e_401333397dd21f3c
40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
4000000000000000_408199999999999a_3f7d1745d1745d17
c018000000000000_408199999999999a_bf85d1745d1745d1
4024000000000000_408199999999999a_3f922e8ba2e8ba2e
c032000000000000_408199999999999a_bfa05d1745d1745d
4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
c05c000000000000_408199999999999a_bfc9745d1745d174
406e000000000000_408199999999999a_3fdb45d1745d1745
c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
408199999999999a_408199999999999a_3ff0000000000000
c093333333333333_408199999999999a_c001745d1745d174
40a028f5c28f5c29_408199999999999a_401d61bed61bed61
c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
40c00068db8bac71_408199999999999a_403d180477e6ade4
c0dd1745d1745d17_408199999999999a_c04a723f789854a0
40e5555555555555_408199999999999a_405364d9364d9364
c0f999999999999a_408199999999999a_c06745d1745d1746
410c71c71c71c71c_408199999999999a_4079dbcc48676f30
4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
c018000000000000_c093333333333333_3f74000000000000
4024000000000000_c093333333333333_bf80aaaaaaaaaaab
c032000000000000_c093333333333333_3f9e000000000000
4041000000000000_c093333333333333_bfac555555555556
c05c000000000000_c093333333333333_3fb7555555555556
406e000000000000_c093333333333333_bfc9000000000000
c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
408199999999999a_c093333333333333_bfed555555555556
c093333333333333_c093333333333333_3ff0000000000000
40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
40c00068db8bac71_c093333333333333_c02aab596de8ca12
c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
40e5555555555555_c093333333333333_c041c71c71c71c72
c0f999999999999a_c093333333333333_4055555555555556
410c71c71c71c71c_c093333333333333_c067b425ed097b42
4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
c032000000000000_40a028f5c28f5c29_bf81d260511be196
4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
4041000000000000_c0b004189374bc6a_bf80fba700417875
c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
406e000000000000_c0b004189374bc6a_bfadf853e2556b29
c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
408199999999999a_c0b004189374bc6a_bfc1951951951953
c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
4041000000000000_40c00068db8bac71_3f70ff909995ab11
c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
406e000000000000_40c00068db8bac71_3f9dff3b6962792e
c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
c093333333333333_40c00068db8bac71_bfc332b5622a8afe
40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
40c00068db8bac71_40c00068db8bac71_3ff0000000000000
c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
40e5555555555555_40c00068db8bac71_401554c989849a70
c0f999999999999a_40c00068db8bac71_c02998f1d838b954
410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
4000000000000000_c0dd1745d1745d17_bf2199999999999a
c018000000000000_c0dd1745d1745d17_3f3a666666666667
4024000000000000_c0dd1745d1745d17_bf46000000000000
c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
4041000000000000_c0dd1745d1745d17_bf62b33333333333
c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
406e000000000000_c0dd1745d1745d17_bf80800000000000
c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
40e5555555555555_c0dd1745d1745d17_c007777777777777
c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
4000000000000000_40e5555555555555_3f18000000000000
c018000000000000_40e5555555555555_bf22000000000000
4024000000000000_40e5555555555555_3f3e000000000000
c032000000000000_40e5555555555555_bf4b000000000000
4041000000000000_40e5555555555555_3f59800000000000
c05c000000000000_40e5555555555555_bf65000000000000
406e000000000000_40e5555555555555_3f76800000000000
c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
408199999999999a_40e5555555555555_3f9a666666666667
c093333333333333_40e5555555555555_bfaccccccccccccd
40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
40c00068db8bac71_40e5555555555555_3fd8009d495182aa
c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
40e5555555555555_40e5555555555555_3ff0000000000000
c0f999999999999a_40e5555555555555_c003333333333334
410c71c71c71c71c_40e5555555555555_4015555555555555
4000000000000000_c0f999999999999a_bf04000000000000
c018000000000000_c0f999999999999a_3f1e000000000000
4024000000000000_c0f999999999999a_bf29000000000000
c032000000000000_c0f999999999999a_3f36800000000000
4041000000000000_c0f999999999999a_bf45400000000000
c05c000000000000_c0f999999999999a_3f51800000000000
406e000000000000_c0f999999999999a_bf62c00000000000
c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
408199999999999a_c0f999999999999a_bf86000000000000
c093333333333333_c0f999999999999a_3f97ffffffffffff
40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
40c00068db8bac71_c0f999999999999a_bfc40083126e978d
c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
c0f999999999999a_c0f999999999999a_3ff0000000000000
410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
4000000000000000_410c71c71c71c71c_3ef2000000000000
c018000000000000_410c71c71c71c71c_bf0b000000000000
4024000000000000_410c71c71c71c71c_3f16800000000000
c032000000000000_410c71c71c71c71c_bf24400000000000
4041000000000000_410c71c71c71c71c_3f33200000000000
c05c000000000000_410c71c71c71c71c_bf4f800000000000
406e000000000000_410c71c71c71c71c_3f50e00000000000
c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
408199999999999a_410c71c71c71c71c_3f73ccccccccccce
c093333333333333_410c71c71c71c71c_bf8599999999999a
40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
40e5555555555555_410c71c71c71c71c_3fd8000000000000
c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000

View File

@ -10,120 +10,64 @@ module testbenchfp;
parameter TEST="none";
string Tests[]; // list of tests to be run
string FmaRneTests[]; // list of FMA round to nearest even tests to run
string FmaRuTests[]; // list of FMA round up tests to run
string FmaRdTests[]; // list of FMA round down tests to run
string FmaRzTests[]; // list of FMA round twords zero
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
logic [2:0] OpCtrl[]; // list of op controls
logic [2:0] Unit[]; // list of units being tested
logic WriteInt[]; // Is being written to integer resgiter
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
logic [1:0] Fmt[]; // list of formats for the other units
logic [1:0] FmaFmt[]; // list of formats for the FMA
logic clk=0;
logic [31:0] TestNum=0; // index for the test
logic [31:0] FmaTestNum=0; // index for the test
logic [31:0] OpCtrlNum=0; // index for OpCtrl
logic [31:0] errors=0; // how many errors
logic [31:0] VectorNum=0; // index for test vector
logic [31:0] FmaVectorNum=0; // index for test vector
logic [31:0] FrmNum=0; // index for rounding mode
logic [`FLEN*4+7:0] TestVectors[46464:0]; // list of test vectors
logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; // list of fma ru test vectors
logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; // list of fma rd test vectors
logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; // list of fma rz test vectors
logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
logic [`FLEN*4+7:0] TestVectors[6133248:0]; // list of test vectors
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
logic [1:0] FmtVal; // value of the current Fmt
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
logic WriteIntVal; // value of the current WriteInt
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat
logic [`XLEN-1:0] SrcA; // integer input
logic [`FLEN-1:0] Ans; // correct answer from TestFloat
logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
logic [`FLEN-1:0] Res; // result from other units
logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
logic [4:0] AnsFlg; // correct flags read from testfloat
logic [4:0] FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
logic [4:0] ResFlg; // Result flags
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
logic [`FMTBITS-1:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
logic [4:0] ResFlg, Flg; // Result flags
logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
logic AnsNaN, ResNaN, NaNGood;
logic XSgn, YSgn, ZSgn; // sign of the inputs
logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp;
logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp;
logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp;
logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp;
logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan;
logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan;
logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan;
logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan;
logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
logic XNaN, YNaN, ZNaN; // is the input NaN
logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
logic XDenorm, ZDenorm; // is the input denormalized
logic FmaRneXDenorm, FmaRneZDenorm;
logic FmaRzXDenorm, FmaRzZDenorm;
logic FmaRuXDenorm, FmaRuZDenorm;
logic FmaRdXDenorm, FmaRdZDenorm;
logic FmaRnmXDenorm, FmaRnmZDenorm;
logic XInf, YInf, ZInf; // is the input infinity
logic FmaRneXInf, FmaRneYInf, FmaRneZInf;
logic FmaRzXInf, FmaRzYInf, FmaRzZInf;
logic FmaRuXInf, FmaRuYInf, FmaRuZInf;
logic FmaRdXInf, FmaRdYInf, FmaRdZInf;
logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
logic XZero, YZero, ZZero; // is the input zero
logic FmaRneXZero, FmaRneYZero, FmaRneZZero;
logic FmaRzXZero, FmaRzYZero, FmaRzZZero;
logic FmaRuXZero, FmaRuYZero, FmaRuZZero;
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
logic IntZeroE;
logic CvtResSgnE;
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
logic CvtResDenormUfE;
// in-between FMA signals
logic Mult;
logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd;
logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;
logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE;
logic [3*`NF+5:0] SumE;
logic InvZE;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
///////////////////////////////////////////////////////////////////////////////////////////////
@ -282,15 +226,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
// add the format for the Fma
FmaFmt = {FmaFmt, 2'b11};
Tests = {Tests, f128fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
end
end
end
if (`D_SUPPORTED) begin // if double precision is supported
@ -411,14 +353,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b01};
Tests = {Tests, f64fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
end
end
end
if (`F_SUPPORTED) begin // if single precision being supported
@ -523,14 +464,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b00};
Tests = {Tests, f32fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
end
end
end
if (`ZFH_SUPPORTED) begin // if half precision supported
@ -617,19 +557,18 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b10};
Tests = {Tests, f16fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
// check if nothing is being tested
if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
if (Tests.size() == 0) begin
$display("TEST %s not supported in this configuration", TEST);
$stop;
end
@ -648,26 +587,17 @@ module testbenchfp;
// Read the first test
initial begin
$display("\n\nRunning %s vectors", Tests[TestNum]);
$display("Running FMA precision %d", FmaTestNum);
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the test index to 0
TestNum = 0;
FmaTestNum = 0;
end
// set a the signals for all tests
always_comb FmaFmtVal = FmaFmt[FmaTestNum];
always_comb UnitVal = Unit[TestNum];
always_comb FmtVal = Fmt[TestNum];
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
always_comb WriteIntVal = WriteInt[OpCtrlNum];
always_comb FrmVal = Frm[FrmNum];
assign Mult = OpCtrlVal === 3'b100;
// modify the format signal if only 2 percisions supported
// - 1 for the larger precision
@ -675,61 +605,9 @@ module testbenchfp;
always_comb begin
if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
else ModFmt = FmtVal;
if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
else FmaModFmt = FmaFmtVal;
end
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg),
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
.XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
.X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg),
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
.XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
.X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg),
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
.XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
.X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg),
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
.XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
.X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
.XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
.X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
@ -754,124 +632,30 @@ module testbenchfp;
///////////////////////////////////////////////////////////////////////////////////////////////
// instantiate devices under test
// - one fma for each precison
// - all the units for the other tests (including fma for add/sub/mul)
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
.XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf),
.XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN),
.KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp),
.SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff),
.PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE),
.FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
.XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf),
.XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN),
.KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp),
.SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff),
.PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ),
.FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
.XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf),
.XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN),
.KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp),
.SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff),
.PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU),
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
.XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf),
.XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN),
.KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp),
.SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff),
.PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD),
.FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
.XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf),
.XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN),
.KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp),
.SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff),
.PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM),
.FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
.ZExpM(ZExp), .ZDenormM(ZDenorm),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN),
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
// .CvtRes, .CvtFlgE);
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
///////////////////////////////////////////////////////////////////////////////////////////////
//Check if the correct answer and result is a NaN
always_comb begin
case (FmaFmtVal)
4'b11: begin // quad
FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
end
4'b01: begin // double
FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
end
4'b00: begin // single
FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
end
4'b10: begin // half
FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
end
endcase
end
always_comb begin
if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
// an integer output can't be a NaN
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
always_comb begin
// select the result to check
case (UnitVal)
`FMAUNIT: Res = FmaRes;
`DIVUNIT: Res = DivRes;
`FMAUNIT: Res = FpRes;
`DIVUNIT: Res = FpRes;
`CMPUNIT: Res = CmpRes;
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
`CVTFPUNIT: Res = CvtRes;
`CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
`CVTFPUNIT: Res = FpRes;
endcase
// select the flag to check
case (UnitVal)
`FMAUNIT: ResFlg = FmaFlg;
`DIVUNIT: ResFlg = DivFlg;
`FMAUNIT: ResFlg = Flg;
`DIVUNIT: ResFlg = Flg;
`CMPUNIT: ResFlg = CmpFlg;
`CVTINTUNIT: ResFlg = CvtFlg;
`CVTFPUNIT: ResFlg = CvtFlg;
`CVTINTUNIT: ResFlg = Flg;
`CVTFPUNIT: ResFlg = Flg;
endcase
end
// check results on falling edge of clk
@ -1027,117 +757,6 @@ end
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
// - the sign of the NaN does not matter for the opperations being tested
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
case (FmaFmtVal)
4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
endcase
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
case (FmtVal)
4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
@ -1221,77 +840,8 @@ end
$stop;
end
// check if the fma tests are correct
if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNE");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
$stop;
end
if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RZ");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
$stop;
end
if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RU");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
$stop;
end
if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RD");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
$stop;
end
if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNM");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
$stop;
end
VectorNum += 1; // increment the vector
FmaVectorNum += 1; // increment the vector
// check to see if there more vectors in this test
// *** fix this so that fma and other run sepratly - re-add fma num
if ((FmaRneVectors[FmaVectorNum][0] === 1'bx &
FmaRzVectors[FmaVectorNum][0] === 1'bx &
FmaRuVectors[FmaVectorNum][0] === 1'bx &
FmaRdVectors[FmaVectorNum][0] === 1'bx &
FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
// increment the test
FmaTestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
FmaVectorNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
$display("Running FMA precision %d", FmaTestNum);
end
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
@ -1299,14 +849,9 @@ end
TestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
VectorNum = 0;
@ -1317,12 +862,7 @@ end
else FrmNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
if(Tests[TestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
@ -1335,89 +875,6 @@ endmodule
module readfmavectors (
input logic clk,
input logic [`FMTBITS-1:0] FmaModFmt, // the modified format
input logic [1:0] FmaFmt, // the format of the FMA inputs
input logic [`FLEN*4+7:0] TestVector, // the test vector
output logic [`FLEN-1:0] Ans, // the correct answer
output logic [4:0] AnsFlg, // the correct flag
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic [`FLEN-1:0] X, Y, Z // inputs
);
logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
always @(posedge clk) begin
#1;
AnsFlg = TestVector[4:0];
case (FmaFmt)
2'b11: begin // quad
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
end
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZDenormE);
endmodule
module readvectors (
input logic clk,
input logic [`FLEN*4+7:0] TestVector,
@ -1451,33 +908,61 @@ module readvectors (
`FMAUNIT:
case (Fmt)
2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
if(OpCtrl === `FMA_OPCTRL) begin
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
end
else begin
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
end
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
end
else begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
end
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
end
else begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
end
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
end
else begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
end
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1532,19 +1017,19 @@ module readvectors (
2'b11: begin // quad
case (OpCtrl[1:0])
2'b11: begin // quad
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1628,12 +1113,12 @@ module readvectors (
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // quad -> long
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // quad -> int
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end

View File

@ -396,6 +396,7 @@ module riscvassertions;
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
@ -418,6 +419,7 @@ module riscvassertions;
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
end
endmodule

View File

@ -2,7 +2,7 @@
`define ADD_OPCTRL 3'b110
`define MUL_OPCTRL 3'b100
`define SUB_OPCTRL 3'b111
`define FADD_OPCTRL 3'b000
`define FMA_OPCTRL 3'b000
`define DIV_OPCTRL 3'b000
`define SQRT_OPCTRL 3'b001
`define LE_OPCTRL 3'b011
@ -21,11 +21,11 @@
`define RU 3'b011
`define RD 3'b010
`define RNM 3'b100
`define FMAUNIT 0
`define FMAUNIT 2
`define DIVUNIT 1
`define CVTINTUNIT 2
`define CVTFPUNIT 3
`define CMPUNIT 4
`define CVTINTUNIT 0
`define CVTFPUNIT 4
`define CMPUNIT 3
string f16rv32cvtint[] = '{
"ui32_to_f16_rne.tv",

View File

@ -1102,11 +1102,11 @@ string imperas32f[] = '{
// "rv64i_m/D/d_fdiv_b20-01", // looks like flags
// "rv64i_m/D/d_fdiv_b2-01", // also flags
// "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
"rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b4-01", // flags
"rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b6-01", // flags
"rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b8-01", // flags
// "rv64i_m/D/d_fdiv_b9-01", might be a flag too
"rv64i_m/D/d_feq_b1-01",

View File

@ -106,7 +106,7 @@ def getVals(tech, module, var, freq=None):
if (freq != None):
for oneSynth in allSynths:
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1):
widthL += [oneSynth.width]
osdict = oneSynth._asdict()
metric += [osdict[var]]
@ -151,33 +151,37 @@ def csvOfBest():
file.close()
return bestSynths
def genLegend(fits, coefs, r2, spec, ale=False):
''' generates a list of two legend elements
labels line with fit equation and dots with tech and r squared of the fit
def genLegend(fits, coefs, r2=None, spec=None, ale=False):
''' generates a list of two legend elements (or just an equation if no r2 or spec)
labels line with fit equation and dots with r squared of the fit
'''
coefsr = [str(round(c, 3)) for c in coefs]
eq = ''
ind = 0
eqDict = {'c': '', 'l': 'N', 's': '$N^2$', 'g': '$log_2$(N)', 'n': 'N$log_2$(N)'}
coefsr = [str(sigfig(c, 2)) for c in coefs]
if ale:
if (normAddWidth == 32):
eqDict = {'c': '', 'l': '(N/32)', 's': '$(N/32)^2$', 'g': '$log_2$(N/32)', 'n': '(N/32)$log_2$(N/32)'}
sub = 'S'
elif normAddWidth != 1:
print('Legend equations are wrong')
print('Equations are wrong, check normAddWidth')
else:
sub = 'N'
eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'}
eq = ''
ind = 0
for k in eqDict.keys():
if k in fits:
if str(coefsr[ind]) != '0.0': eq += " + " + coefsr[ind] + eqDict[k]
if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k]
ind += 1
eq = eq[3:] # chop off leading ' + '
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +' $R^2$='+ str(round(r2, 4)))]
return legend_elements
if (r2==None) or (spec==None):
return eq
else:
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))]
return legend_elements
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None):
''' module: string module name
@ -197,9 +201,14 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
allMetrics = []
ale = (var != 'delay') # if not delay, must be area, leakage, or energy
modFit = fitDict[mod]
modFit = fitDict[module]
fits = modFit[ale]
if freq:
ls = '--'
else:
ls = '-'
for spec in techSpecs:
metric = getVals(spec.tech, module, var, freq=freq)
@ -209,26 +218,26 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
metric = [m/norm for m in metric]
if len(metric) == 5: # don't include the spec if we don't have points for all widths
xp, pred, coefs, r2 = regress(widths, metric, fits)
xp, pred, coefs, r2 = regress(widths, metric, fits, ale)
fullLeg += genLegend(fits, coefs, r2, spec, ale=ale)
c = color if color else spec.color
ax.scatter(widths, metric, color=c, marker=spec.shape)
ax.plot(xp, pred, color=c)
ax.plot(xp, pred, color=c, linestyle=ls)
allWidths += widths
allMetrics += metric
combined = TechSpec('combined', 'red', '_', 0, 0, 0, 0)
xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits)
leg = genLegend(fits, coefs, r2, combined, ale=ale)
fullLeg += leg
ax.plot(xp, pred, color='red')
ax.plot(xp, pred, color='red', linestyle=ls)
if norm:
ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"}
else:
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (fJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
ax.legend(handles=fullLeg)
# fullLeg += genLegend(fits, coefs, r2, combined, ale=ale)
# legLoc = 'upper left' if ale else 'center right'
# ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc))
ax.set_xticks(widths)
ax.set_xlabel("Width (bits)")
ax.set_ylabel(ylabeldic[var])
@ -243,15 +252,20 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
ax.set_title(module + titleStr)
plt.savefig('./plots/PPA/'+ module + '_' + var + '.png')
# plt.show()
return fullLeg
return r2
def regress(widths, var, fits='clsgn'):
def regress(widths, var, fits='clsgn', ale=False):
''' fits a curve to the given points
returns lists of x and y values to plot that curve and legend elements with the equation
returns lists of x and y values to plot that curve and coefs for the eq with r2
'''
funcArr = genFuncs(fits)
widths = [w/normAddWidth for w in widths]
xp = np.linspace(4, 140, 200)
xpToCalc = xp
if ale:
widths = [w/normAddWidth for w in widths]
xpToCalc = [x/normAddWidth for x in xp]
mat = []
for w in widths:
@ -262,53 +276,91 @@ def regress(widths, var, fits='clsgn'):
y = np.array(var, dtype=np.float)
coefs = opt.nnls(mat, y)[0]
yp = []
for w in widths:
n = [func(w) for func in funcArr]
yp += [sum(np.multiply(coefs, n))]
r2 = skm.r2_score(y, yp)
xp = np.linspace(4, 140, 200)
pred = []
for x in xp:
n = [func(x/normAddWidth) for func in funcArr]
for x in xpToCalc:
n = [func(x) for func in funcArr]
pred += [sum(np.multiply(coefs, n))]
return xp, pred, coefs, r2
def makeCoefTable():
'''
writes CSV with each line containing the coefficients for a regression fit
''' writes CSV with each line containing the coefficients for a regression fit
to a particular combination of module, metric (including both techs, normalized)
'''
file = open("ppaFitting.csv", "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Metric', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
for module in modules:
for var in ['delay', 'area', 'lpower', 'denergy']:
ale = (var != 'delay')
metL = []
modFit = fitDict[module]
fits = modFit[ale]
for freq in [10, None]:
target = 'easy' if freq else 'hard'
for var in ['delay', 'area', 'lpower', 'denergy']:
ale = (var != 'delay')
metL = []
modFit = fitDict[module]
fits = modFit[ale]
for spec in techSpecs:
metric = getVals(spec.tech, module, var)
techdict = spec._asdict()
norm = techdict[var]
metL += [m/norm for m in metric]
for spec in techSpecs:
metric = getVals(spec.tech, module, var, freq=freq)
techdict = spec._asdict()
norm = techdict[var]
metL += [m/norm for m in metric]
xp, pred, coefs, r2 = regress(widths*2, metL, fits)
coefs = np.ndarray.tolist(coefs)
coefsToWrite = [None]*5
fitTerms = 'clsgn'
ind = 0
for i in range(len(fitTerms)):
if fitTerms[i] in fits:
coefsToWrite[i] = coefs[ind]
ind += 1
row = [module, var] + coefsToWrite + [r2]
writer.writerow(row)
xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
coefs = np.ndarray.tolist(coefs)
coefsToWrite = [None]*5
fitTerms = 'clsgn'
ind = 0
for i in range(len(fitTerms)):
if fitTerms[i] in fits:
coefsToWrite[i] = coefs[ind]
ind += 1
row = [module, var, target] + coefsToWrite + [r2]
writer.writerow(row)
file.close()
def sigfig(num, figs):
return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs)))
def makeEqTable():
''' writes CSV with each line containing the equations for fits for each metric
to a particular module (including both techs, normalized)
'''
file = open("ppaEquations.csv", "w")
writer = csv.writer(file)
writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy'])
for module in modules:
eqs = []
for freq in [None, 10]:
for var in ['delay', 'area', 'lpower', 'denergy']:
if (var == 'delay') and (freq == 10):
pass
else:
ale = (var != 'delay')
metL = []
modFit = fitDict[module]
fits = modFit[ale]
for spec in techSpecs:
metric = getVals(spec.tech, module, var, freq=freq)
techdict = spec._asdict()
norm = techdict[var]
metL += [m/norm for m in metric]
xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
coefs = np.ndarray.tolist(coefs)
eqs += [genLegend(fits, coefs, ale=ale)]
row = [module] + eqs
writer.writerow(row)
file.close()
@ -369,7 +421,7 @@ def freqPlot(tech, mod, width):
delays = delaysL[ind]
freqs = freqsL[ind]
freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses
# freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses
c = 'blue' if ind else 'green'
# adprod = adprodpow(areas, delays, 1)
@ -383,14 +435,18 @@ def freqPlot(tech, mod, width):
lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]
ax1.legend(handles=legend_elements)
width = str(width)
ax2.set_xlabel("Target Freq (MHz)")
ax1.set_ylabel('Delay (ns)')
ax2.set_ylabel('Area (sq microns)')
# ax3.set_ylabel('Area * Delay')
# ax4.set_ylabel('Area * $Delay^2$')
ax1.set_title(mod + '_' + str(width))
plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + str(width) + '.png')
ax1.set_title(mod + '_' + width)
if ('mux' in mod) & ('d' in mod):
width = mod
mod = 'muxd'
plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png')
# plt.show()
def squareAreaDelay(tech, mod, width):
@ -485,30 +541,35 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False):
'''
plt.rcParams["figure.figsize"] = (10,7)
fig, axs = plt.subplots(2, 2)
# fig, axs = plt.subplots(4, 1)
# oneMetricPlot(mod, 'delay', ax=axs[0], fits=modFit[0], freq=freq, norm=norm)
# oneMetricPlot(mod, 'area', ax=axs[1], fits=modFit[1], freq=freq, norm=norm)
# oneMetricPlot(mod, 'lpower', ax=axs[2], fits=modFit[1], freq=freq, norm=norm)
# oneMetricPlot(mod, 'denergy', ax=axs[3], fits=modFit[1], freq=freq, norm=norm)
oneMetricPlot(mod, 'delay', ax=axs[0,0], freq=freq, norm=norm)
oneMetricPlot(mod, 'area', ax=axs[0,1], freq=freq, norm=norm)
oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=freq, norm=norm)
fullLeg = oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=freq, norm=norm)
arr = [['delay', 'area'], ['lpower', 'denergy']]
freqs = [freq]
if aleOpt: freqs += [10]
for i in [0, 1]:
for j in [0, 1]:
leg = []
for f in freqs:
if (arr[i][j]=='delay') and (f==10):
pass
else:
r2 = oneMetricPlot(mod, arr[i][j], ax=axs[i, j], freq=f, norm=norm)
ls = '--' if f else '-'
leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)]
axs[i, j].legend(handles=leg)
if aleOpt:
oneMetricPlot(mod, 'area', ax=axs[0,1], freq=10, norm=norm, color='black')
oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=10, norm=norm, color='black')
oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=10, norm=norm, color='black')
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
n = 'normalized' if norm else 'unnormalized'
saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else ""
plt.suptitle(mod + titleStr)
# fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))
fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')]
fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')]
fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))
if freq != 10: plt.savefig(saveStr)
if freq != 10:
n = 'normalized' if norm else 'unnormalized'
saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
plt.savefig(saveStr)
# plt.show()
def plotBestAreas(mod):
@ -533,16 +594,17 @@ if __name__ == '__main__':
##############################
# set up stuff, global variables
widths = [8, 16, 32, 64, 128]
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult']
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] #, 'mux2d', 'mux4d', 'mux8d',]
normAddWidth = 32 # divisor to use with N since normalizing to add_32
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 'ls', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l']))
leftblue = [['mux2', 'sky90', 32], ['mux2', 'sky90', 64], ['mux2', 'sky90', 128], ['mux8', 'sky90', 32], ['mux2', 'tsmc28', 8], ['mux2', 'tsmc28', 64]]
TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1330.84, 582.81, 520.66], ['tsmc28', 'blue', '^', 12.2e-3, 209.29, 1060, 81.43]]
techSpecs = [TechSpec(*t) for t in techSpecs]
combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0)
# invz1arealeakage = [['sky90', 1.96, 1.98], ['gf32', .351, .3116], ['tsmc28', .252, 1.09]] #['gf32', 'purple', 's', 15e-3]
##############################
@ -556,13 +618,14 @@ if __name__ == '__main__':
# squareAreaDelay('sky90', 'add', 32)
# oneMetricPlot('add', 'delay')
# freqPlot('sky90', 'mux4', 16)
# plotBestAreas('add')
# makeCoefTable()
# makeEqTable()
for mod in ['mux2']: #modules:
for mod in modules:
plotPPA(mod, norm=False)
plotPPA(mod) #, aleOpt=True)
# plotBestAreas(mod)
# for w in [8, 16, 32, 64, 128]:
# freqPlot('sky90', mod, w)
# freqPlot('tsmc28', mod, w)
plotPPA(mod, aleOpt=True)
for w in [8, 16, 32, 64, 128]:
freqPlot('sky90', mod, w)
freqPlot('tsmc28', mod, w)
plt.close('all')

11
synthDC/ppaEquations.csv Normal file
View File

@ -0,0 +1,11 @@
Element,Best delay,Fast area,Fast leakage,Fast energy,Small area,Small leakage,Small energy
priorityencoder,0.98$log_2$(N),0.33S,0.25S,0.093S,0.15S,0.046S,0.00046S
add,1.8 + 1.4$log_2$(N),1.1S,0.95S,1S,0.34S,0.16S,0.025S
csa,3.6,0.93S,1.5S,1.1S,0.34S,0.16S,0.00055S
shiftleft,0.48 + 1.6$log_2$(N),1.9S,2.3S,1.5S,0.8S,0.29S,0.0059S
comparator,2 + 0.94$log_2$(N),0.6S,0.47S,0.31S,0.34S,0.16S,0.00089S
flop,3.3,0.34S,0.37S,0.0012S,0.34S,0.37S,0.0012S
mux2,2.8 + 0.38$log_2$(N),0.2S,0.18S,0.16S,0.15S,0.12S,0.0011S
mux4,3.1 + 0.51$log_2$(N),0.36S,0.32S,0.28S,0.28S,0.11S,0.0021S
mux8,5 + 0.45$log_2$(N),0.76S,0.66S,0.45S,0.55S,0.24S,0.0029S
mult,6$log_2$(N),13S + 10$S^2$,26S + 7.3$S^2$,42S + 25$S^2$,1.1S + 7.9$S^2$,1S + 3.4$S^2$,2.1$S^2$
1 Element Best delay Fast area Fast leakage Fast energy Small area Small leakage Small energy
2 priorityencoder 0.98$log_2$(N) 0.33S 0.25S 0.093S 0.15S 0.046S 0.00046S
3 add 1.8 + 1.4$log_2$(N) 1.1S 0.95S 1S 0.34S 0.16S 0.025S
4 csa 3.6 0.93S 1.5S 1.1S 0.34S 0.16S 0.00055S
5 shiftleft 0.48 + 1.6$log_2$(N) 1.9S 2.3S 1.5S 0.8S 0.29S 0.0059S
6 comparator 2 + 0.94$log_2$(N) 0.6S 0.47S 0.31S 0.34S 0.16S 0.00089S
7 flop 3.3 0.34S 0.37S 0.0012S 0.34S 0.37S 0.0012S
8 mux2 2.8 + 0.38$log_2$(N) 0.2S 0.18S 0.16S 0.15S 0.12S 0.0011S
9 mux4 3.1 + 0.51$log_2$(N) 0.36S 0.32S 0.28S 0.28S 0.11S 0.0021S
10 mux8 5 + 0.45$log_2$(N) 0.76S 0.66S 0.45S 0.55S 0.24S 0.0029S
11 mult 6$log_2$(N) 13S + 10$S^2$ 26S + 7.3$S^2$ 42S + 25$S^2$ 1.1S + 7.9$S^2$ 1S + 3.4$S^2$ 2.1$S^2$

View File

@ -74,7 +74,7 @@ if { $saifpower == 1 } {
if {$drive != "INV"} {
set_false_path -from [get_ports reset]
}
if {(($::env(DESIGN) == "ppa_mux2_1") || ($::env(DESIGN) == "ppa_mux4_1") || ($::env(DESIGN) == "ppa_mux8_1"))} {
if {(($::env(DESIGN) == "ppa_mux2d_1") || ($::env(DESIGN) == "ppa_mux4d_1") || ($::env(DESIGN) == "ppa_mux8d_1"))} {
set_false_path -from {s}
}

View File

@ -2,482 +2,482 @@
BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
echo "Creating ui32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
echo "Creating ui32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
echo "Creating ui32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
echo "Creating ui32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
echo "Creating ui64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
echo "Creating ui64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
echo "Creating ui64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
echo "Creating ui64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
echo "Creating i32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
echo "Creating i32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
echo "Creating i32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
echo "Creating i32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
echo "Creating i64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
echo "Creating i64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
echo "Creating i64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
echo "Creating i64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
echo "Creating f16_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
echo "Creating f32_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
echo "Creating f64_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
echo "Creating f128_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
echo "Creating f16_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
echo "Creating f32_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
echo "Creating f64_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
echo "Creating f128_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
echo "Creating f16_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
echo "Creating f32_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
echo "Creating f64_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
echo "Creating f128_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
echo "Creating f16_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
echo "Creating f32_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
echo "Creating f64_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
echo "Creating f128_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
echo "Creating f16_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
echo "Creating f16_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
echo "Creating f16_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
echo "Creating f32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
echo "Creating f32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
echo "Creating f32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
echo "Creating f64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
echo "Creating f64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
echo "Creating f64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
echo "Creating f128_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
echo "Creating f128_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
echo "Creating f128_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
echo "Creating f16_add vectors"
$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
echo "Creating f32_add vectors"
$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
echo "Creating f64_add vectors"
$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
echo "Creating f128_add vectors"
$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
echo "Creating f16_sub vectors"
$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
echo "Creating f32_sub vectors"
$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
echo "Creating f64_sub vectors"
$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
echo "Creating f128_sub vectors"
$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
echo "Creating f16_mul vectors"
$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
echo "Creating f32_mul vectors"
$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
echo "Creating f64_mul vectors"
$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
echo "Creating f128_mul vectors"
$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
echo "Creating f16_div vectors"
$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
echo "Creating f32_div vectors"
$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
echo "Creating f64_div vectors"
$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
echo "Creating f128_div vectors"
$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
echo "Creating f16_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
echo "Creating f32_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
echo "Creating f64_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
echo "Creating f128_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
echo "Creating f16_eq vectors"
$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
echo "Creating f32_eq vectors"
$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
echo "Creating f64_eq vectors"
$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
echo "Creating f128_eq vectors"
$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
echo "Creating f16_le vectors"
$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
echo "Creating f32_le vectors"
$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
echo "Creating f64_le vectors"
$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
echo "Creating f128_le vectors"
$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
echo "Creating f16_lt vectors"
$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
echo "Creating f32_lt vectors"
$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
echo "Creating f64_lt vectors"
$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
echo "Creating f128_lt vectors"
$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
echo "Creating f16_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
echo "Creating f32_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
echo "Creating f64_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
echo "Creating f128_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv