mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main
This commit is contained in:
commit
7c0f4dd954
@ -1 +1 @@
|
||||
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
|
||||
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
|
23
examples/verilog/fma/Makefile
Normal file
23
examples/verilog/fma/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
# Makefile
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
LIBS = -lm
|
||||
LFLAGS = -L.
|
||||
# Link against the riscv-isa-sim version of SoftFloat rather than
|
||||
# the regular version to get RISC-V NaN behavior
|
||||
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
|
||||
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
|
||||
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
|
||||
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
|
||||
SRCS = $(wildcard *.c)
|
||||
|
||||
PROGS = $(patsubst %.c,%,$(SRCS))
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
%: %.c
|
||||
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
clean:
|
||||
rm -f $(PROGS)
|
4225
examples/verilog/fma/baby_torture.tv
Normal file
4225
examples/verilog/fma/baby_torture.tv
Normal file
File diff suppressed because it is too large
Load Diff
1057
examples/verilog/fma/baby_torture_rz.tv
Normal file
1057
examples/verilog/fma/baby_torture_rz.tv
Normal file
File diff suppressed because it is too large
Load Diff
23
examples/verilog/fma/fma.do
Normal file
23
examples/verilog/fma/fma.do
Normal file
@ -0,0 +1,23 @@
|
||||
# fma.do
|
||||
#
|
||||
# run with vsim -do "do fma.do"
|
||||
# add -c before -do for batch simulation
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
vlib worklib
|
||||
|
||||
vlog -lint -sv -work worklib fma16.v testbench.v
|
||||
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
|
||||
vsim -lib worklib testbenchopt
|
||||
|
||||
add wave sim:/testbench_fma16/clk
|
||||
add wave sim:/testbench_fma16/reset
|
||||
add wave sim:/testbench_fma16/x
|
||||
add wave sim:/testbench_fma16/y
|
||||
add wave sim:/testbench_fma16/z
|
||||
add wave sim:/testbench_fma16/result
|
||||
add wave sim:/testbench_fma16/rexpected
|
||||
|
||||
run -all
|
268
examples/verilog/fma/fma16.v
Normal file
268
examples/verilog/fma/fma16.v
Normal file
@ -0,0 +1,268 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
`define FFLEN 16
|
||||
`define Nf 10
|
||||
`define Ne 5
|
||||
`define BIAS 15
|
||||
`define EMIN (-(2**(`Ne-1)-1))
|
||||
`define EMAX (2**(`Ne-1)-1)
|
||||
|
||||
`define NaN 16'h7E00
|
||||
`define INF 15'h7C00
|
||||
|
||||
// rounding modes *** update
|
||||
`define RZ 3'b00
|
||||
`define RNE 3'b01
|
||||
`define RM 3'b10
|
||||
`define RP 3'b11
|
||||
|
||||
module fma16(
|
||||
input logic [`FFLEN-1:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [`FFLEN-1:0] result);
|
||||
|
||||
logic [`Nf:0] xm, ym, zm; // U1.Nf
|
||||
logic [`Ne-1:0] xe, ye, ze; // B_Ne
|
||||
logic xs, ys, zs;
|
||||
logic zs1; // sign before optional negation
|
||||
logic [2*`Nf+1:0] pm; // U2.2Nf
|
||||
logic [`Ne:0] pe; // B_Ne+1
|
||||
logic ps; // sign of product
|
||||
logic [22:0] rm;
|
||||
logic [`Ne+1:0] re;
|
||||
logic rs;
|
||||
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
|
||||
logic [`Ne+1:0] re2;
|
||||
|
||||
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
|
||||
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
|
||||
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
|
||||
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
|
||||
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
|
||||
endmodule
|
||||
|
||||
module mult16(
|
||||
input logic mul,
|
||||
input logic [`Nf:0] xm, ym,
|
||||
input logic [`Ne-1:0] xe, ye,
|
||||
input logic xs, ys,
|
||||
output logic [2*`Nf+1:0] pm,
|
||||
output logic [`Ne:0] pe,
|
||||
output logic ps);
|
||||
|
||||
// only multiply if mul = 1
|
||||
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
|
||||
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
|
||||
assign ps = xs ^ ys; // negative if X xor Y are negative
|
||||
endmodule
|
||||
|
||||
module add16(
|
||||
input logic add,
|
||||
input logic [2*`Nf+1:0] pm, // U2.2Nf
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [`Ne:0] pe, // B_Ne+1
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic ps, zs,
|
||||
input logic negz,
|
||||
output logic [22:0] rm,
|
||||
output logic [`Ne+1:0] re, // B_Ne+2
|
||||
output logic [`Ne+1:0] re2,
|
||||
output logic rs);
|
||||
|
||||
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
|
||||
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
|
||||
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
|
||||
logic [`Nf-1:0] prezsticky;
|
||||
logic zsticky;
|
||||
logic effectivesub;
|
||||
logic rs0;
|
||||
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
|
||||
logic [`Ne:0] re1;
|
||||
|
||||
// Alignment shift
|
||||
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
|
||||
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
|
||||
always_comb // AlignCount mux; see Muller page 254
|
||||
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
|
||||
else begin AlignCnt = 0; re = {2'b0, ze}; end
|
||||
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
|
||||
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
|
||||
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
|
||||
|
||||
// Effective subtraction
|
||||
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
|
||||
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
|
||||
|
||||
// Adder
|
||||
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
|
||||
assign rs0 = r[`Nf*3+7]; // sign of the initial result
|
||||
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
|
||||
|
||||
// Sign Logic
|
||||
assign rs = ps ^ rs0; // flip the sign if necessary
|
||||
|
||||
// Leading zero counter
|
||||
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
|
||||
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
|
||||
|
||||
// Normalization shift
|
||||
always_comb // NormCount mux
|
||||
if (ExpDiff < 3) begin
|
||||
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
|
||||
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
|
||||
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
|
||||
assign rnormed = r2 << NormCnt; // *** update sticky
|
||||
/* temporarily comment out to start synth
|
||||
|
||||
// One-bit secondary normalization
|
||||
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
|
||||
else begin // *** handle sticky
|
||||
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
|
||||
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
|
||||
else begin rnormed2 = rnormed << 1; re2 = re-1; end
|
||||
end
|
||||
|
||||
// round
|
||||
assign l = rnormed2[***]; // least significant bit
|
||||
assign r = rnormed2[***-1]; // rounding bit
|
||||
assign s = ***; // sticky bit
|
||||
always_comb
|
||||
case (roundmode)
|
||||
RZ: roundup = 0;
|
||||
RP: roundup = ~rs & (r | s);
|
||||
RM: roundup = rs & (r | s);
|
||||
RNE: roundup = r & (s | l);
|
||||
default: roundup = 0;
|
||||
endcase
|
||||
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
|
||||
*/
|
||||
|
||||
// *** need to handle rounding to MAXNUM vs. INFINITY
|
||||
|
||||
// add or pass product through
|
||||
/* assign rm = add ? arm : {1'b0, pm};
|
||||
assign re = add ? are : {1'b0, pe};
|
||||
assign rs = add ? ars : ps; */
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [`Nf*3+7:0] r2,
|
||||
output logic [`Ne:0] leadingzeros
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module postproc16(
|
||||
input logic [1:0] roundmode,
|
||||
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
|
||||
input logic [22:0] rm,
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [6:0] re,
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic rs, zs, ps,
|
||||
input logic [`Ne+1:0] re2,
|
||||
output logic [15:0] result);
|
||||
|
||||
logic [9:0] uf, uff;
|
||||
logic [6:0] ue;
|
||||
logic [6:0] ueb, uebiased;
|
||||
logic invalid;
|
||||
|
||||
// Special cases
|
||||
// *** not handling signaling NaN
|
||||
// *** also add overflow/underflow/inexact
|
||||
always_comb begin
|
||||
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
|
||||
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
|
||||
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
|
||||
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
|
||||
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
|
||||
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
|
||||
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
|
||||
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
|
||||
end
|
||||
|
||||
always_comb
|
||||
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
|
||||
ue = re + 7'b1;
|
||||
uf = rm[20:11];
|
||||
end else begin // no normalization shift needed
|
||||
ue = re;
|
||||
uf = rm[19:10];
|
||||
end
|
||||
|
||||
// overflow
|
||||
always_comb begin
|
||||
ueb = ue-7'd15;
|
||||
if (ue >= 7'd46) begin // overflow
|
||||
/* uebiased = 7'd30;
|
||||
uff = 10'h3ff; */
|
||||
end else begin
|
||||
uebiased = ue-7'd15;
|
||||
uff = uf;
|
||||
end
|
||||
end
|
||||
|
||||
assign result = {rs, uebiased[4:0], uff};
|
||||
|
||||
// add special case handling for zeros, NaN, Infinity
|
||||
endmodule
|
||||
|
||||
module signadj16(
|
||||
input logic negr, negz,
|
||||
input logic xs, ys, zs1,
|
||||
output logic ps, zs);
|
||||
|
||||
assign ps = xs ^ ys; // sign of product
|
||||
assign zs = zs1 ^ negz; // sign of addend
|
||||
endmodule
|
||||
|
||||
module unpack16(
|
||||
input logic [15:0] x, y, z,
|
||||
output logic [10:0] xm, ym, zm,
|
||||
output logic [4:0] xe, ye, ze,
|
||||
output logic xs, ys, zs,
|
||||
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
|
||||
|
||||
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
|
||||
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
|
||||
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
|
||||
endmodule
|
||||
|
||||
module unpacknum16(
|
||||
input logic [15:0] num,
|
||||
output logic [10:0] m,
|
||||
output logic [4:0] e,
|
||||
output logic s,
|
||||
output logic zero, inf, nan);
|
||||
|
||||
logic [9:0] f; // fraction without leading 1
|
||||
logic [4:0] eb; // biased exponent
|
||||
|
||||
assign {s, eb, f} = num; // pull bit fields out of floating-point number
|
||||
assign m = {1'b1, f}; // prepend leading 1 to fraction
|
||||
assign e = eb; // leave bias in exponent ***
|
||||
assign zero = (e == 0 && f == 0);
|
||||
assign inf = (e == 31 && f == 0);
|
||||
assign nan = (e == 31 && f != 0);
|
||||
endmodule
|
||||
|
||||
|
24
examples/verilog/fma/fma16_template.v
Normal file
24
examples/verilog/fma/fma16_template.v
Normal file
@ -0,0 +1,24 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
module fma16(
|
||||
input logic [15:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [15:0] result);
|
||||
|
||||
endmodule
|
||||
|
240
examples/verilog/fma/fma16_testgen.c
Normal file
240
examples/verilog/fma/fma16_testgen.c
Normal file
@ -0,0 +1,240 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "softfloat.h"
|
||||
#include "softfloat_types.h"
|
||||
|
||||
typedef union sp {
|
||||
float32_t v;
|
||||
float f;
|
||||
} sp;
|
||||
|
||||
// lists of tests, terminated with 0x8000
|
||||
uint16_t easyExponents[] = {15, 0x8000};
|
||||
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
|
||||
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
|
||||
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
|
||||
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
|
||||
uint16_t zeros[] = {0x0000, 0x8000};
|
||||
uint16_t infs[] = {0x7C00, 0xFC00};
|
||||
uint16_t nans[] = {0x7D00, 0x7D01};
|
||||
|
||||
void softfloatInit(void) {
|
||||
softfloat_roundingMode = softfloat_round_minMag;
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_detectTininess = softfloat_tininess_beforeRounding;
|
||||
}
|
||||
|
||||
float convFloat(float16_t f16) {
|
||||
float32_t f32;
|
||||
float res;
|
||||
sp r;
|
||||
|
||||
f32 = f16_to_f32(f16);
|
||||
r.v = f32;
|
||||
res = r.f;
|
||||
return res;
|
||||
}
|
||||
|
||||
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
float16_t result;
|
||||
int op, flagVals;
|
||||
char calc[80], flags[80];
|
||||
float32_t x32, y32, z32, r32;
|
||||
float xf, yf, zf, rf;
|
||||
float16_t smallest;
|
||||
|
||||
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
|
||||
if (!add) z.v = 0x0000; // force z to 0 to avoid add
|
||||
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
|
||||
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
|
||||
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
|
||||
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
|
||||
softfloat_exceptionFlags = 0; // clear exceptions
|
||||
result = f16_mulAdd(x, y, z);
|
||||
|
||||
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
|
||||
(softfloat_exceptionFlags >> 4) % 2,
|
||||
(softfloat_exceptionFlags >> 2) % 2,
|
||||
(softfloat_exceptionFlags >> 1) % 2,
|
||||
(softfloat_exceptionFlags) % 2);
|
||||
// pack these four flags into one nibble, discarding DZ flag
|
||||
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
|
||||
|
||||
|
||||
// convert to floats for printing
|
||||
xf = convFloat(x);
|
||||
yf = convFloat(y);
|
||||
zf = convFloat(z);
|
||||
rf = convFloat(result);
|
||||
if (mul)
|
||||
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
|
||||
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
|
||||
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
|
||||
|
||||
// omit denorms, which aren't required for this project
|
||||
smallest.v = 0x0400;
|
||||
float16_t resultmag = result;
|
||||
resultmag.v &= 0x7FFF; // take absolute value
|
||||
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
|
||||
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
|
||||
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
|
||||
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
|
||||
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
|
||||
}
|
||||
|
||||
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
|
||||
FILE *fptr, int *numCases) {
|
||||
int i, j;
|
||||
|
||||
fprintf(fptr, desc); fprintf(fptr, "\n");
|
||||
*numCases=0;
|
||||
for (i=0; e[i] != 0x8000; i++)
|
||||
for (j=0; f[j] != 0x8000; j++) {
|
||||
cases[*numCases].v = f[j] | e[i]<<10;
|
||||
*numCases = *numCases + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
z.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
y.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
y.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
z.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
z.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, l, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (l=0; l<=sgn; l++) {
|
||||
z.v ^= (l<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, sx, sy, sz, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
cases[numCases].v = 0x0000; // add +0 case
|
||||
cases[numCases+1].v = 0x8000; // add -0 case
|
||||
numCases += 2;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (sx=0; sx<=sgn; sx++) {
|
||||
x.v ^= (sx<<15);
|
||||
for (sy=0; sy<=sgn; sy++) {
|
||||
y.v ^= (sy<<15);
|
||||
for (sz=0; sz<=sgn; sz++) {
|
||||
z.v ^= (sz<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
softfloatInit(); // configure softfloat modes
|
||||
|
||||
// Test cases: multiplication
|
||||
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: addition
|
||||
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: FMA
|
||||
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: Zero, Infinity, NaN
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
|
||||
|
||||
// Full test cases with other rounding modes
|
||||
softfloat_roundingMode = softfloat_round_near_even;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_min;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_max;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
8
examples/verilog/fma/lint-fma
Executable file
8
examples/verilog/fma/lint-fma
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# check for warnings in Verilog code
|
||||
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
|
||||
export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
$verilator --lint-only --top-module fma16 fma16.v
|
2
examples/verilog/fma/sim-fma
Executable file
2
examples/verilog/fma/sim-fma
Executable file
@ -0,0 +1,2 @@
|
||||
vsim -do "do fma.do"
|
||||
|
1
examples/verilog/fma/sim-fma-batch
Executable file
1
examples/verilog/fma/sim-fma-batch
Executable file
@ -0,0 +1 @@
|
||||
vsim -c -do "do fma.do"
|
1
examples/verilog/fma/synth
Executable file
1
examples/verilog/fma/synth
Executable file
@ -0,0 +1 @@
|
||||
make -C ../../../synthDC synth DESIGN=fma16
|
52
examples/verilog/fma/testbench.v
Normal file
52
examples/verilog/fma/testbench.v
Normal file
@ -0,0 +1,52 @@
|
||||
/* verilator lint_off STMTDLY */
|
||||
module testbench_fma16;
|
||||
reg clk, reset;
|
||||
reg [15:0] x, y, z, rexpected;
|
||||
wire [15:0] result;
|
||||
reg [7:0] ctrl;
|
||||
reg [3:0] flagsexpected;
|
||||
reg mul, add, negp, negz;
|
||||
reg [1:0] roundmode;
|
||||
reg [31:0] vectornum, errors;
|
||||
reg [75:0] testvectors[10000:0];
|
||||
|
||||
// instantiate device under test
|
||||
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
|
||||
|
||||
// generate clock
|
||||
always
|
||||
begin
|
||||
clk = 1; #5; clk = 0; #5;
|
||||
end
|
||||
|
||||
// at start of test, load vectors and pulse reset
|
||||
initial
|
||||
begin
|
||||
$readmemh("work/fmul_0.tv", testvectors);
|
||||
vectornum = 0; errors = 0;
|
||||
reset = 1; #22; reset = 0;
|
||||
end
|
||||
|
||||
// apply test vectors on rising edge of clk
|
||||
always @(posedge clk)
|
||||
begin
|
||||
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
|
||||
{roundmode, mul, add, negp, negz} = ctrl[5:0];
|
||||
end
|
||||
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk)
|
||||
if (~reset) begin // skip during reset
|
||||
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
|
||||
$display("Error: inputs %h * %h + %h", x, y, z);
|
||||
$display(" result = %h (%h expected)", result, rexpected);
|
||||
errors = errors + 1;
|
||||
end
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 'x) begin
|
||||
$display("%d tests completed with %d errors",
|
||||
vectornum, errors);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
endmodule
|
86257
examples/verilog/fma/torture.tv
Normal file
86257
examples/verilog/fma/torture.tv
Normal file
File diff suppressed because it is too large
Load Diff
130
examples/verilog/fma/torturegen.pl
Executable file
130
examples/verilog/fma/torturegen.pl
Executable file
@ -0,0 +1,130 @@
|
||||
#!/usr/bin/perl -w
|
||||
# torturegen.pl
|
||||
# David_Harris@hmc.edu 19 April 2022
|
||||
# Convert TestFloat cases into format for fma16 project torture test
|
||||
# Strip out cases involving denorms
|
||||
|
||||
use strict;
|
||||
|
||||
my @basenames = ("add", "mul", "mulAdd");
|
||||
my @roundingmodes = ("rz", "rd", "ru", "rne");
|
||||
my @names = ();
|
||||
foreach my $name (@basenames) {
|
||||
foreach my $mode (@roundingmodes) {
|
||||
push(@names, "f16_${name}_$mode.tv");
|
||||
}
|
||||
}
|
||||
|
||||
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
|
||||
my $datestring = localtime();
|
||||
print(TORTURE "// Torture tests generated $datestring by $0\n");
|
||||
foreach my $tv (@names) {
|
||||
open(TV, "work/$tv") || die("Can't read $tv");
|
||||
my $type = &getType($tv); # is it mul, add, mulAdd
|
||||
my $rm = &getRm($tv); # rounding mode
|
||||
# if ($rm != 0) { next; } # only do rz
|
||||
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
my $linecount = 0;
|
||||
my $babyTorture = 0;
|
||||
while (<TV>) {
|
||||
my $line = $_;
|
||||
$linecount++;
|
||||
my $density = 10;
|
||||
if ($type eq "mulAdd") {$density = 500;}
|
||||
if ($babyTorture) {
|
||||
$density = 100;
|
||||
if ($type eq "mulAdd") {$density = 50000;}
|
||||
}
|
||||
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
|
||||
chomp($line); # strip off newline
|
||||
my @parts = split(/_/, $line);
|
||||
my ($x, $y, $z, $op, $w, $flags);
|
||||
$x = $parts[0];
|
||||
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
|
||||
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
|
||||
$op = $rm << 4;
|
||||
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
|
||||
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
|
||||
my $opname = sprintf("%02x", $op);
|
||||
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
|
||||
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
|
||||
$flags = substr($flags, -1); # take last character
|
||||
if (&fpval($w) eq "NaN") { $w = "7e00"; }
|
||||
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
|
||||
my $skip = "";
|
||||
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
|
||||
$skip = "Skipped denorm";
|
||||
}
|
||||
my $summary = &summary($x, $y, $z, $w, $type);
|
||||
if ($skip ne "") {
|
||||
print TORTURE "// $skip $tv line $linecount $line $summary\n"
|
||||
}
|
||||
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
|
||||
}
|
||||
close(TV);
|
||||
}
|
||||
close(TORTURE);
|
||||
|
||||
sub fpval {
|
||||
my $val = shift;
|
||||
$val = hex($val); # convert hex string to number
|
||||
my $frac = $val & 0x3FF;
|
||||
my $exp = ($val >> 10) & 0x1F;
|
||||
my $sign = $val >> 15;
|
||||
|
||||
my $res;
|
||||
if ($exp == 31 && $frac != 0) { return "NaN"; }
|
||||
elsif ($exp == 31) { $res = "INF"; }
|
||||
elsif ($val == 0) { $res = 0; }
|
||||
elsif ($exp == 0) { $res = "Denorm"; }
|
||||
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
|
||||
|
||||
if ($sign == 1) { $res = "-$res"; }
|
||||
return $res;
|
||||
}
|
||||
|
||||
sub summary {
|
||||
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
|
||||
|
||||
my $xv = &fpval($x);
|
||||
my $yv = &fpval($y);
|
||||
my $zv = &fpval($z);
|
||||
my $wv = &fpval($w);
|
||||
|
||||
if ($type eq "add") { return "$xv + $zv = $wv"; }
|
||||
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
|
||||
else {return "$xv * $yv + $zv = $wv"; }
|
||||
}
|
||||
|
||||
sub getType {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /mulAdd/) { return("mulAdd"); }
|
||||
elsif ($tv =~ /mul/) { return "mul"; }
|
||||
else { return "add"; }
|
||||
}
|
||||
|
||||
sub getRm {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /rz/) { return 0; }
|
||||
elsif ($tv =~ /rne/) { return 1; }
|
||||
elsif ($tv =~ /rd/) {return 2; }
|
||||
elsif ($tv =~ /ru/) { return 3; }
|
||||
else { return "bad"; }
|
||||
}
|
||||
|
||||
sub isdenorm {
|
||||
my $fp = shift;
|
||||
my $val = hex($fp);
|
||||
my $expv = $val >> 10;
|
||||
$expv = $expv & 0x1F;
|
||||
my $denorm = 0;
|
||||
if ($expv == 0 && $val != 0) { $denorm = 1;}
|
||||
# my $e0 = ($expv == 0);
|
||||
# my $vn0 = ($val != 0);
|
||||
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
|
||||
# print("Num $fp Exp $expv Denorm $denorm Done\n");
|
||||
return $denorm;
|
||||
}
|
62
examples/verilog/fma/wave.do
Normal file
62
examples/verilog/fma/wave.do
Normal file
@ -0,0 +1,62 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate /testbench_fma16/clk
|
||||
add wave -noupdate /testbench_fma16/reset
|
||||
add wave -noupdate /testbench_fma16/x
|
||||
add wave -noupdate /testbench_fma16/y
|
||||
add wave -noupdate /testbench_fma16/z
|
||||
add wave -noupdate /testbench_fma16/result
|
||||
add wave -noupdate /testbench_fma16/rexpected
|
||||
add wave -noupdate /testbench_fma16/dut/x
|
||||
add wave -noupdate /testbench_fma16/dut/y
|
||||
add wave -noupdate /testbench_fma16/dut/z
|
||||
add wave -noupdate /testbench_fma16/dut/mul
|
||||
add wave -noupdate /testbench_fma16/dut/add
|
||||
add wave -noupdate /testbench_fma16/dut/negr
|
||||
add wave -noupdate /testbench_fma16/dut/negz
|
||||
add wave -noupdate /testbench_fma16/dut/roundmode
|
||||
add wave -noupdate /testbench_fma16/dut/result
|
||||
add wave -noupdate /testbench_fma16/dut/XManE
|
||||
add wave -noupdate /testbench_fma16/dut/YManE
|
||||
add wave -noupdate /testbench_fma16/dut/ZManE
|
||||
add wave -noupdate /testbench_fma16/dut/XExpE
|
||||
add wave -noupdate /testbench_fma16/dut/YExpE
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpE
|
||||
add wave -noupdate /testbench_fma16/dut/PExpE
|
||||
add wave -noupdate /testbench_fma16/dut/Ne
|
||||
add wave -noupdate /testbench_fma16/dut/upOneExt
|
||||
add wave -noupdate /testbench_fma16/dut/XSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/YSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ZSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/PSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManE
|
||||
add wave -noupdate /testbench_fma16/dut/NfracS
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManAl
|
||||
add wave -noupdate /testbench_fma16/dut/ZManExt
|
||||
add wave -noupdate /testbench_fma16/dut/ZManAl
|
||||
add wave -noupdate /testbench_fma16/dut/Nfrac
|
||||
add wave -noupdate /testbench_fma16/dut/res
|
||||
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
|
||||
add wave -noupdate /testbench_fma16/dut/NSamt
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpGreater
|
||||
add wave -noupdate /testbench_fma16/dut/ACLess
|
||||
add wave -noupdate /testbench_fma16/dut/upOne
|
||||
add wave -noupdate /testbench_fma16/dut/KillProd
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
configure wave -namecolwidth 237
|
||||
configure wave -valuecolwidth 64
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {4083 ns} {4235 ns}
|
@ -55,20 +55,22 @@
|
||||
`define Q_NE 32'd15
|
||||
`define Q_NF 32'd112
|
||||
`define Q_BIAS 32'd16383
|
||||
`define Q_FMT 2'd3
|
||||
`define D_LEN 32'd64
|
||||
`define D_NE 32'd11
|
||||
`define D_NF 32'd52
|
||||
`define D_BIAS 32'd1023
|
||||
`define D_FMT 32'd1
|
||||
`define D_FMT 2'd1
|
||||
`define S_LEN 32'd32
|
||||
`define S_NE 32'd8
|
||||
`define S_NF 32'd23
|
||||
`define S_BIAS 32'd127
|
||||
`define S_FMT 32'd1
|
||||
`define S_FMT 2'd0
|
||||
`define H_LEN 32'd16
|
||||
`define H_NE 32'd5
|
||||
`define H_NF 32'd10
|
||||
`define H_BIAS 32'd15
|
||||
`define H_FMT 2'd2
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
|
||||
@ -91,6 +93,12 @@
|
||||
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
/* verilator lint_off STMTDLY */
|
||||
|
@ -9,4 +9,4 @@
|
||||
# sqrt - test square ro
|
||||
# all - test everything
|
||||
|
||||
vsim -do "do testfloat.do rv64fpquad cmp"
|
||||
vsim -do "do testfloat.do rv64fp mul"
|
||||
|
@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
|
||||
add wave -hex /testbench/dut/core/ebu/HRDATA
|
||||
add wave -hex /testbench/dut/core/ebu/HWRITE
|
||||
add wave -hex /testbench/dut/core/ebu/HWDATA
|
||||
add wave -hex /testbench/dut/core/ebu/HBURST
|
||||
add wave -hex /testbench/dut/core/ebu/CaptureDataM
|
||||
add wave -divider
|
||||
|
||||
|
@ -1,102 +1,9 @@
|
||||
|
||||
add wave -noupdate /testbenchfp/clk
|
||||
add wave -noupdate -radix decimal /testbenchfp/VectorNum
|
||||
add wave -group Other -noupdate /testbenchfp/FrmNum
|
||||
add wave -group Other -noupdate /testbenchfp/X
|
||||
add wave -group Other -noupdate /testbenchfp/Y
|
||||
add wave -group Other -noupdate /testbenchfp/Z
|
||||
add wave -group Other -noupdate /testbenchfp/Res
|
||||
add wave -group Other -noupdate /testbenchfp/Ans
|
||||
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneX
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneY
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneZ
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneRes
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneAns
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzX
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzY
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzZ
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzRes
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzAns
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuX
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuY
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuZ
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuRes
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuAns
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdX
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdY
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdZ
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdRes
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdAns
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
|
||||
add wave -group AllSignals -noupdate /*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
|
||||
add wave -noupdate /testbenchfp/FrmNum
|
||||
add wave -noupdate /testbenchfp/X
|
||||
add wave -noupdate /testbenchfp/Y
|
||||
add wave -noupdate /testbenchfp/Z
|
||||
add wave -noupdate /testbenchfp/Res
|
||||
add wave -noupdate /testbenchfp/Ans
|
||||
|
@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0
|
||||
|
@ -45,6 +45,10 @@ module ahblite (
|
||||
input logic IFUBusRead,
|
||||
output logic [`XLEN-1:0] IFUBusHRDATA,
|
||||
output logic IFUBusAck,
|
||||
output logic IFUBusInit,
|
||||
input logic [2:0] IFUBurstType,
|
||||
input logic [1:0] IFUTransType,
|
||||
input logic IFUTransComplete,
|
||||
// Signals from Data Cache
|
||||
input logic [`PA_BITS-1:0] LSUBusAdr,
|
||||
input logic LSUBusRead,
|
||||
@ -52,7 +56,11 @@ module ahblite (
|
||||
input logic [`XLEN-1:0] LSUBusHWDATA,
|
||||
output logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
input logic [2:0] LSUBusSize,
|
||||
input logic [2:0] LSUBurstType,
|
||||
input logic [1:0] LSUTransType,
|
||||
input logic LSUTransComplete,
|
||||
output logic LSUBusAck,
|
||||
output logic LSUBusInit,
|
||||
// AHB-Lite external signals
|
||||
(* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
|
||||
(* mark_debug = "true" *) input logic HREADY, HRESP,
|
||||
@ -87,6 +95,9 @@ module ahblite (
|
||||
// Data accesses have priority over instructions. However, if a data access comes
|
||||
// while an instruction read is occuring, the instruction read finishes before
|
||||
// the data access can take place.
|
||||
// *** This is no longer true when adding burst mode. We need to finish the current
|
||||
// read before doing another read. Need to work this out, but preliminarily we can
|
||||
// store the current read type in a flop and use that to figure out what burst type to use.
|
||||
|
||||
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);
|
||||
|
||||
@ -100,19 +111,21 @@ module ahblite (
|
||||
// interface that might be used in place of the ahblite.
|
||||
always_comb
|
||||
case (BusState)
|
||||
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
|
||||
else if (LSUBusWrite)NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMREAD: if (~HREADY) NextBusState = MEMREAD;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMWRITE: if (~HREADY) NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
INSTRREAD: if (~HREADY) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE; // if (IFUBusRead still high) *** need to wait?
|
||||
default: NextBusState = IDLE;
|
||||
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
|
||||
else if (LSUBusWrite) NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMREAD: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
|
||||
else if (LSUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = MEMREAD;
|
||||
MEMWRITE: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
|
||||
else if (LSUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = MEMWRITE;
|
||||
INSTRREAD: if (IFUTransComplete & LSUBusRead) NextBusState = MEMREAD;
|
||||
else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
|
||||
else if (IFUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = INSTRREAD;
|
||||
default: NextBusState = IDLE;
|
||||
endcase
|
||||
|
||||
|
||||
@ -122,7 +135,7 @@ module ahblite (
|
||||
assign #1 HADDR = AccessAddress;
|
||||
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
|
||||
assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
|
||||
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
|
||||
assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
|
||||
|
||||
/* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
|
||||
000: Single (SINGLE)
|
||||
@ -133,15 +146,16 @@ module ahblite (
|
||||
101: 8-beat incrementing burst (INCR8)
|
||||
110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
|
||||
111: 16-beat incrementing burst (INCR16)
|
||||
*/
|
||||
*** Remove if not necessary
|
||||
*/
|
||||
|
||||
|
||||
assign HPROT = 4'b0011; // not used; see Section 3.7
|
||||
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
|
||||
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
|
||||
assign HMASTLOCK = 0; // no locking supported
|
||||
assign HWRITE = NextBusState == MEMWRITE;
|
||||
assign HWRITE = (NextBusState == MEMWRITE);
|
||||
// delay write data by one cycle for
|
||||
flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
|
||||
flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
|
||||
// delay signals for subword writes
|
||||
flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD);
|
||||
flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
|
||||
@ -153,7 +167,9 @@ module ahblite (
|
||||
|
||||
assign IFUBusHRDATA = HRDATA;
|
||||
assign LSUBusHRDATA = HRDATA;
|
||||
assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
|
||||
assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
|
||||
assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
|
||||
assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
|
||||
assign IFUBusAck = HREADY & (BusState == INSTRREAD);
|
||||
assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));
|
||||
|
||||
endmodule
|
||||
|
69
pipelined/src/fpu/cvtshiftcalc.sv
Normal file
69
pipelined/src/fpu/cvtshiftcalc.sv
Normal file
@ -0,0 +1,69 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cvtshiftcalc(
|
||||
input logic XZeroM,
|
||||
input logic ToInt,
|
||||
input logic IntToFp,
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NF:0] XManM, // input mantissas
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic CvtResDenormUfM,
|
||||
output logic CvtResUf,
|
||||
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
|
||||
);
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | LzcInM | 0's if nessisary |
|
||||
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{CvtLzcInM, {`NF+1{1'b0}}};
|
||||
|
||||
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
|
||||
|
||||
endmodule
|
@ -2,13 +2,12 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
// FOpCtrlE values
|
||||
// 111 min
|
||||
// 110 min
|
||||
// 101 max
|
||||
// 010 equal
|
||||
// 001 less than
|
||||
// 011 less than or equal
|
||||
|
||||
|
||||
module fcmp (
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // see above table
|
||||
@ -20,12 +19,13 @@ module fcmp (
|
||||
input logic XSNaNE, YSNaNE, // is signaling NaN
|
||||
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
|
||||
output logic CmpNVE, // invalid flag
|
||||
output logic [`FLEN-1:0] CmpResE // compare resilt
|
||||
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
|
||||
output logic [`XLEN-1:0] CmpIntResE // compare resilt
|
||||
);
|
||||
|
||||
logic LTabs, LT, EQ; // is X < or > or = Y
|
||||
logic [`FLEN-1:0] NaNRes;
|
||||
logic BothZeroE, EitherNaNE, EitherSNaNE;
|
||||
logic BothZero, EitherNaN, EitherSNaN;
|
||||
|
||||
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
|
||||
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
|
||||
@ -36,9 +36,9 @@ module fcmp (
|
||||
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
|
||||
assign EQ = (FSrcXE == FSrcYE);
|
||||
|
||||
assign BothZeroE = XZeroE&YZeroE;
|
||||
assign EitherNaNE = XNaNE|YNaNE;
|
||||
assign EitherSNaNE = XSNaNE|YSNaNE;
|
||||
assign BothZero = XZeroE&YZeroE;
|
||||
assign EitherNaN = XNaNE|YNaNE;
|
||||
assign EitherSNaN = XSNaNE|YSNaNE;
|
||||
|
||||
|
||||
// flags
|
||||
@ -47,11 +47,11 @@ module fcmp (
|
||||
// EQ - quiet - sets invalid if signaling NaN input
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpNVE = EitherSNaNE;//min
|
||||
3'b101: CmpNVE = EitherSNaNE;//max
|
||||
3'b010: CmpNVE = EitherSNaNE;//equal
|
||||
3'b001: CmpNVE = EitherNaNE;//less than
|
||||
3'b011: CmpNVE = EitherNaNE;//less than or equal
|
||||
3'b110: CmpNVE = EitherSNaN;//min
|
||||
3'b101: CmpNVE = EitherSNaN;//max
|
||||
3'b010: CmpNVE = EitherSNaN;//equal
|
||||
3'b001: CmpNVE = EitherNaN;//less than
|
||||
3'b011: CmpNVE = EitherNaN;//less than or equal
|
||||
default: CmpNVE = 1'b0;
|
||||
endcase
|
||||
end
|
||||
@ -112,16 +112,12 @@ module fcmp (
|
||||
endcase
|
||||
|
||||
// when one input is a NaN -output the non-NaN
|
||||
always_comb
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
|
||||
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
|
||||
3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
|
||||
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
|
||||
3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
|
||||
3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
|
||||
3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
|
||||
default: CmpResE = (`FLEN)'(0);
|
||||
endcase
|
||||
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
|
||||
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
|
||||
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
|
||||
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
|
||||
|
||||
|
||||
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
|
||||
|
||||
endmodule
|
||||
|
@ -10,99 +10,99 @@ module fctrl (
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic FRegWriteD, // FP register write enable
|
||||
output logic FDivStartD, // Start division or squareroot
|
||||
output logic [1:0] FResultSelD, // select result to be written to fp register
|
||||
output logic [1:0] FResSelD, // select result to be written to fp register
|
||||
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [1:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
|
||||
output logic [1:0] PostProcSelD,
|
||||
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
);
|
||||
|
||||
`define FCTRLW 13
|
||||
`define FCTRLW 11
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
//*** will putting x for don't cares reduce area in synthisis???
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
|
||||
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1;
|
||||
else case(OpD)
|
||||
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
|
||||
7'b1010011: casez(Funct7D)
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_000_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_001_1_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b00101??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b10100??: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000__0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])//***reduce resSel
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_00_000_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.w to int reg
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.d to int reg
|
||||
else ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w w->s
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
|
||||
endcase
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s s->lu
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.w.x to fp reg
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w w->d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
|
||||
endcase
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d d->lu
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.d.x to fp reg
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
|
||||
endcase
|
||||
|
||||
// unswizzle control bits
|
||||
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// rounding modes:
|
||||
// 000 - round to nearest, ties to even
|
||||
@ -121,82 +121,61 @@ module fctrl (
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
|
||||
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
// FResultSel:
|
||||
// 000 - ReadRes - load
|
||||
// 001 - FMARes - FMA and multiply
|
||||
// 010 - FAddRes - add and fp to fp
|
||||
// 011 - FDivRes - divide and squareroot
|
||||
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
|
||||
// FResSel:
|
||||
// 00 - SrcA - move to fp register
|
||||
// 01 - SgnRes - sign injection
|
||||
// 10 - CmpRes - min/max
|
||||
// 11 - CvtRes - convert to fp
|
||||
|
||||
// FIntResSel:
|
||||
// 00 - CmpRes - less than, equal, or less than or equal
|
||||
// 01 - FSrcX - move to int register
|
||||
// 10 - ClassRes - classify
|
||||
// 11 - CvtRes - convert to signed/unsigned int
|
||||
// Final Res Sel:
|
||||
// fp int
|
||||
// 00 other cmp
|
||||
// 01 postproc cvt
|
||||
// 10 store class
|
||||
// 11 mv
|
||||
|
||||
// OpCtrl values:
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
// post processing Sel:
|
||||
// 00 cvt
|
||||
// 01 div
|
||||
// 10 fma
|
||||
|
||||
// cmp
|
||||
// fmin = ?111
|
||||
// fmax = ?101
|
||||
// feq = ?010
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
// Other Sel:
|
||||
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
|
||||
// 000 - sign 00
|
||||
// 001 - negate sign 00
|
||||
// 010 - xor sign 00
|
||||
// 011 - mv to fp 01
|
||||
// 110 - min 10
|
||||
// 101 - max 10
|
||||
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, negate product, negate addend}
|
||||
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
// fsgnjn = ??01
|
||||
// fsgnjx = ??10
|
||||
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// fcvt.s.d = 0111
|
||||
// fcvt.d.s = 0111
|
||||
// Fmt controls the output for fp -> fp
|
||||
|
||||
// convert
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int}
|
||||
// OpCtrl:
|
||||
// Fma: {not multiply-add?, negate prod?, negate Z?}
|
||||
// 000 - fmadd
|
||||
// 001 - fmsub
|
||||
// 010 - fnmsub
|
||||
// 011 - fnmadd
|
||||
// 100 - mul
|
||||
// 110 - add
|
||||
// 111 - sub
|
||||
// Div:
|
||||
// 0 - ???
|
||||
// 1 - ???
|
||||
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
|
||||
// Cvt Fp: output format
|
||||
// 10 - to half
|
||||
// 00 - to single
|
||||
// 01 - to double
|
||||
// 11 - to quad
|
||||
// Cmp: {equal?, less than?}
|
||||
// 010 - eq
|
||||
// 001 - lt
|
||||
// 011 - le
|
||||
// 110 - min
|
||||
// 101 - max
|
||||
// Sgn:
|
||||
// 00 - sign
|
||||
// 01 - negate sign
|
||||
// 10 - xor sign
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -1,8 +1,5 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
|
||||
module fcvt (
|
||||
input logic XSgnE, // input's sign
|
||||
@ -13,14 +10,13 @@ module fcvt (
|
||||
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZeroE, // is the input zero
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic XInfE, // is the input infinity
|
||||
input logic XNaNE, // is the input a NaN
|
||||
input logic XSNaNE, // is the input a signaling NaN
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
|
||||
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
|
||||
output logic [4:0] CvtFlgE // the conversion's flags
|
||||
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
|
||||
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
|
||||
output logic CvtResDenormUfE,// does the result underflow or is denormalized
|
||||
output logic CvtResSgnE, // the result's sign
|
||||
output logic IntZeroE, // is the integer zero?
|
||||
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
@ -41,34 +37,8 @@ module fcvt (
|
||||
logic [`FMTBITS-1:0] OutFmt; // format of the output
|
||||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`NE:0] CalcExp; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
|
||||
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
|
||||
logic ResDenormUf;// does the result underflow or is denormalized
|
||||
logic ResUf; // does the result underflow
|
||||
logic [`LGLEN+`NF:0] Shifted; // the shifted result
|
||||
logic [`NE-2:0] NewBias; // the bias of the final result
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic ResSgn; // the result's sign
|
||||
logic Sticky; // sticky bit - for rounding
|
||||
logic Round; // round bit - for rounding
|
||||
logic LSBFrac; // the least significant bit of the fraction - for rounding
|
||||
logic CalcPlus1; // the calculated plus 1
|
||||
logic Plus1; // add one to the final result?
|
||||
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
|
||||
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
|
||||
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
|
||||
logic [`NF-1:0] ResFrac; // the result's fraction
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic Overflow, Underflow, Inexact, Invalid; // flags
|
||||
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
|
||||
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
|
||||
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
|
||||
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
|
||||
logic KillRes; // kill the result?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
@ -97,8 +67,9 @@ module fcvt (
|
||||
// 1) negate the input if the input is a negitive singed integer
|
||||
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
|
||||
|
||||
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
assign IntZeroE = ~|TrimInt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// lzc
|
||||
@ -107,32 +78,16 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | lzcIn | 0's if nessisary |
|
||||
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{LzcIn, {`NF+1{1'b0}}};
|
||||
// kill the shift if it's negitive
|
||||
// kill the shift if it's negitive
|
||||
// select the amount to shift by
|
||||
// fp -> int:
|
||||
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
|
||||
@ -144,47 +99,10 @@ module fcvt (
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
|
||||
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
|
||||
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
|
||||
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
|
||||
|
||||
// shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -262,40 +180,11 @@ module fcvt (
|
||||
// - shift left to normilize (-1-ZeroCnt)
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
|
||||
assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -307,498 +196,7 @@ module fcvt (
|
||||
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
|
||||
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
|
||||
// - otherwise: the floating point input's sign
|
||||
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// rounding
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
endmodule
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// 11 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
|
||||
// round to -infinity - Plus1 if negative
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 1x - Plus1
|
||||
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
|
||||
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
|
||||
end
|
||||
`FMT1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
|
||||
end
|
||||
`FMT2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
|
||||
end
|
||||
default: begin
|
||||
ToFpSticky = 1'bx;
|
||||
ToFpRound = 1'bx;
|
||||
ToFpLSBFrac = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
|
||||
end
|
||||
2'h1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
|
||||
end
|
||||
2'h0: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
|
||||
end
|
||||
2'h2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
end
|
||||
|
||||
always_comb
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResSgn;//round down
|
||||
3'b011: CalcPlus1 = ~ResSgn;//round up
|
||||
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
// dont round if exact
|
||||
assign Plus1 = CalcPlus1&(Round|Sticky);
|
||||
|
||||
// shift the 1 to the propper position for rounding
|
||||
// - dont round it converting to integer
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
|
||||
default: ShiftedPlus1 = 0;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
|
||||
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
|
||||
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
|
||||
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
|
||||
endcase
|
||||
end
|
||||
// kill calcExp if the result is denormalized
|
||||
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
|
||||
// trim the result's expoent to size
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// flags
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate the flags
|
||||
|
||||
// find the maximum exponent (the exponent and larger overflows)
|
||||
if (`FPSIZES == 1) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
MaxExpFp = {`NE{1'b1}};
|
||||
end
|
||||
`FMT1: begin
|
||||
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
end
|
||||
`FMT2: begin
|
||||
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
|
||||
end
|
||||
default: begin
|
||||
MaxExpFp = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
MaxExpFp = {`Q_NE{1'b1}};
|
||||
end
|
||||
2'h1: begin
|
||||
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
|
||||
end
|
||||
2'h0: begin
|
||||
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
|
||||
end
|
||||
2'h2: begin
|
||||
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
end
|
||||
|
||||
// if the result exponent is larger then the maximum possible exponent
|
||||
// | and the exponent is positive
|
||||
// | | and the input is not NaN or Infinity
|
||||
// | | |
|
||||
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
|
||||
|
||||
// if the result is denormalized or underflowed
|
||||
// | and the result did not round into normal values
|
||||
// | | and the result is not exact
|
||||
// | | | and the result isn't NaN
|
||||
// | | | |
|
||||
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
|
||||
|
||||
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
|
||||
// if there were bits thrown away
|
||||
// | if overflowed or underflowed
|
||||
// | | and if not a NaN
|
||||
// | | |
|
||||
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
|
||||
|
||||
// if the result is too small to be represented and not 0
|
||||
// | and if the result is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// if an input was a singaling NaN(and we're using a FP input)
|
||||
// |
|
||||
assign FpInvalid = (XSNaNE&~IntToFp);
|
||||
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
|
||||
// if the input is NaN or infinity
|
||||
// | if the integer result overflows (out of range)
|
||||
// | | if the input was negitive but ouputing to a unsigned number
|
||||
// | | | the result doesn't round to zero
|
||||
// | | | | or the result rounds up out of bounds
|
||||
// | | | | and the result didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive result rounds up out of range
|
||||
// select the inexact flag to output
|
||||
assign Invalid = ToInt ? IntInvalid : FpInvalid;
|
||||
// pack the flags together
|
||||
// - fp -> int does not set the overflow or underflow flags
|
||||
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// result selection
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if you shoould kill the result
|
||||
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
NaNRes = 1'bx;
|
||||
InfRes = 1'bx;
|
||||
UfRes = 1'bx;
|
||||
Res = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// choose the floating point result
|
||||
// - if the input is NaN (and using the NaN input) output the NaN result
|
||||
// - if the input is infinity or the output overflows
|
||||
// - kill the InfE signal if the input isn't a floating point value
|
||||
// - if killing the result output the underflow result
|
||||
// - otherwise output the normal result
|
||||
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
|
||||
(XInfE&~IntToFp)|Overflow ? InfRes :
|
||||
KillRes ? UfRes :
|
||||
Res;
|
||||
// *** probably can optimize the negation
|
||||
// select the overflow integer result
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged result should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
// round and negate the positive result if needed
|
||||
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal result (trmined and sign extended if nessisary)
|
||||
assign CvtIntResE = Invalid ? OfIntRes :
|
||||
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
|
||||
endmodule
|
@ -34,7 +34,7 @@ module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
|
||||
input logic [4:0] RdM, RdW, // the adress being written to
|
||||
input logic [1:0] FResultSelM, // the result being selected
|
||||
input logic [1:0] FResSelM, // the result being selected
|
||||
output logic FStallD, // stall the decode stage
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
|
||||
);
|
||||
@ -47,10 +47,12 @@ module fhazard(
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
|
||||
|
||||
// if the needed value is in the memory stage - input 1
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
@ -59,7 +61,7 @@ module fhazard(
|
||||
// if the needed value is in the memory stage - input 2
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
@ -68,7 +70,7 @@ module fhazard(
|
||||
// if the needed value is in the memory stage - input 3
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
152
pipelined/src/fpu/flags.sv
Normal file
152
pipelined/src/fpu/flags.sv
Normal file
@ -0,0 +1,152 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module flags(
|
||||
input logic XSgnM,
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic Plus1,
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XZeroM, YZeroM, // inputs are zero
|
||||
input logic XNaNM, YNaNM, // inputs are NaN
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic Sqrt, // Sqrt?
|
||||
input logic ToInt, // convert to integer
|
||||
input logic IntToFp, // convert integer to floating point
|
||||
input logic Int64, // convert to 64 bit integer
|
||||
input logic Signed, // convert to a signed integer
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt
|
||||
input logic CvtOp, // conversion opperation?
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic FmaOp, // Fma opperation?
|
||||
input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] RoundExp, // exponent of the normalized sum
|
||||
input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
|
||||
output logic Invalid, Overflow, Underflow, // flags used to select the res
|
||||
output logic [4:0] PostProcFlgM // flags
|
||||
);
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic Inexact; // inexact flag
|
||||
logic FpInexact; // floating point inexact flag
|
||||
logic IntInexact; // integer inexact flag
|
||||
logic IntInvalid; // integer invalid flag
|
||||
logic FmaInvalid; // integer invalid flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic DivByZero;
|
||||
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
MaxExpFp = {`NE{1'b1}};
|
||||
end
|
||||
`FMT1: begin
|
||||
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
end
|
||||
`FMT2: begin
|
||||
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
|
||||
end
|
||||
default: begin
|
||||
MaxExpFp = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
MaxExpFp = {`Q_NE{1'b1}};
|
||||
end
|
||||
2'h1: begin
|
||||
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
|
||||
end
|
||||
2'h0: begin
|
||||
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
|
||||
end
|
||||
2'h2: begin
|
||||
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
end
|
||||
|
||||
// if the result is greater than or equal to the max exponent
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = (FullResExp>={2'b0, MaxExp}) & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
|
||||
|
||||
// detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
// | the result is denormalized
|
||||
// | | the result is normal and rounded from a denorm
|
||||
// | | | and if given an unbounded exponent the result does not round
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
|
||||
|
||||
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~Invalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
// if the input is NaN or infinity
|
||||
// | if the integer res overflows (out of range)
|
||||
// | | if the input was negitive but ouputing to a unsigned number
|
||||
// | | | the res doesn't round to zero
|
||||
// | | | | or the res rounds up out of bounds
|
||||
// | | | | and the res didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaNM|XInfM|Overflow|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive res rounds up out of range
|
||||
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
|
||||
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
|
||||
|
||||
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp) | (IntInvalid&CvtOp&ToInt);
|
||||
|
||||
|
||||
assign DivByZero = YZeroM&DivOp;
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
assign PostProcFlgM = {Invalid, DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
@ -30,73 +30,6 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall memory stage
|
||||
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
|
||||
input logic XSgnM, YSgnM, // input signs - memory stage
|
||||
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
|
||||
input logic ZDenormE, // is denorm
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
|
||||
input logic XNaNM, YNaNM, ZNaNM, // is NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
|
||||
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
|
||||
input logic XInfM, YInfM, ZInfM, // is infinity
|
||||
output logic [`FLEN-1:0] FMAResM, // FMA result
|
||||
output logic [4:0] FMAFlgM); // FMA flags
|
||||
|
||||
//fma/mult/add
|
||||
// fmadd = 000
|
||||
// fmsub = 001
|
||||
// fnmsub = 010 -(a*b)+c
|
||||
// fnmadd = 011 -(a*b)-c
|
||||
// fmul = 100
|
||||
// fadd = 110
|
||||
// fsub = 111
|
||||
|
||||
// signals transfered between pipeline stages
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
|
||||
logic Mult;
|
||||
logic ZDenormM;
|
||||
|
||||
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
|
||||
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
|
||||
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
//*** in al units before putting into : ? put in a seperate signal
|
||||
|
||||
module fma1(
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input's signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
|
||||
@ -111,7 +44,7 @@ module fma1(
|
||||
output logic InvZE, // intert Z
|
||||
output logic ZSgnEffE, // the modified Z sign
|
||||
output logic PSgnE, // the product's sign
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
|
||||
);
|
||||
|
||||
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
|
||||
@ -151,7 +84,7 @@ module fma1(
|
||||
|
||||
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
|
||||
|
||||
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
|
||||
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);
|
||||
|
||||
// Choose the positive sum and accompanying LZA result.
|
||||
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
|
||||
@ -332,7 +265,7 @@ endmodule
|
||||
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
input logic [3*`NF+6:0] A, // addend
|
||||
input logic [2*`NF+1:0] P, // product
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
|
||||
);
|
||||
|
||||
logic [3*`NF+6:0] T;
|
||||
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
|
||||
|
||||
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module fma2(
|
||||
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic Mult, // multiply opperation
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
|
||||
output logic [`FLEN-1:0] FMAResM, // FMA final result
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
|
||||
|
||||
logic [`NF-1:0] ResultFrac; // Result fraction
|
||||
logic [`NE-1:0] ResultExp; // Result exponent
|
||||
logic ResultSgn, ResultSgnTmp; // Result sign
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+1:0] NormSum; // normalized sum
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky, UfSticky; // Sticky bit
|
||||
logic CalcPlus1; // do you add or subtract one for rounding
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Invalid,Underflow,Overflow; // flags
|
||||
logic Guard, Round; // bits needed to determine rounding
|
||||
logic UfLSBNormSum; // bits needed to determine rounding for underflow flag
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
|
||||
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
|
||||
.CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
|
||||
.FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
|
||||
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
|
||||
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
|
||||
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
|
||||
|
||||
// *** use NF where needed
|
||||
|
||||
endmodule
|
||||
|
||||
module resultsign(
|
||||
input logic [2:0] FrmM,
|
||||
input logic PSgnM, ZSgnEffM,
|
||||
input logic Underflow,
|
||||
input logic InvZM,
|
||||
input logic NegSumM,
|
||||
input logic SumZero,
|
||||
input logic Mult,
|
||||
output logic ResultSgnTmp,
|
||||
output logic ResultSgn
|
||||
);
|
||||
|
||||
logic ZeroSgn;
|
||||
// logic ResultSgnTmp;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// if multiply then Psgn
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module normalize(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic [`NE-1:0] ZExpM, // exponent of Z
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZDenormM,
|
||||
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
|
||||
output logic [`NF+1:0] NormSum, // normalized sum
|
||||
output logic SumZero, // is the sum zero
|
||||
output logic NormSumSticky, UfSticky, // sticky bits
|
||||
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
output logic ResultDenorm // is the result denormalized
|
||||
);
|
||||
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
|
||||
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
assign SumExpTmp = SumExpTmpTmp;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: SumExpTmp = SumExpTmpTmp;
|
||||
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
|
||||
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
|
||||
default: SumExpTmp = `NE+2'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: SumExpTmp = SumExpTmpTmp;
|
||||
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine if the result is denormalized
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
|
||||
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
|
||||
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
|
||||
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
default: PreResultDenorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
|
||||
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
|
||||
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
|
||||
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
|
||||
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// 010. when should be 001.
|
||||
// - shift left one
|
||||
// - add one from exp
|
||||
// - if kill prod dont add to exp
|
||||
|
||||
// Determine if the result is denormal
|
||||
// assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// Normalize the sum
|
||||
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
|
||||
// LZA correction
|
||||
assign LZAPlus1 = SumShifted[3*`NF+7];
|
||||
assign LZAPlus2 = SumShifted[3*`NF+8];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
|
||||
|
||||
// Calculate the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// 3*NF+5 - NF1 - 3
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
|
||||
|
||||
end
|
||||
|
||||
assign UfSticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
|
||||
|
||||
endmodule
|
||||
|
||||
module fmaround(
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic UfSticky, // sticky bit for underlow calculation
|
||||
input logic [`NF+1:0] NormSum, // normalized sum
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic NormSumSticky, // normalized sum's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
input logic InvZM, // invert Z
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ResultSgnTmp, // the result's sign
|
||||
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResultFrac, // Result fraction
|
||||
output logic [`NE-1:0] ResultExp, // Result exponent
|
||||
output logic Sticky, // sticky bit
|
||||
output logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
|
||||
);
|
||||
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic UfGuard; // guard bit used to caluculate underflow
|
||||
logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
|
||||
logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa
|
||||
logic UfRound;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Round = NormSum[1];
|
||||
assign LSBNormSum = NormSum[2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfRound = NormSum[0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/-------------NF---------------,
|
||||
// | NF1 | 2 | |
|
||||
// '-------NF1------^
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[0];
|
||||
end
|
||||
`FMT1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`NF1+1];
|
||||
LSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`NF1];
|
||||
end
|
||||
`FMT2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`NF2+1];
|
||||
LSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`NF2];
|
||||
end
|
||||
default: begin
|
||||
Round = 1'bx;
|
||||
LSBNormSum = 1'bx;
|
||||
UfRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[0];
|
||||
end
|
||||
2'h1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`D_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`D_NF];
|
||||
end
|
||||
2'h0: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`S_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`S_NF];
|
||||
end
|
||||
2'h2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`H_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`H_NF];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
// used to determine underflow flag
|
||||
assign UfLSBNormSum = Round;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
|
||||
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
|
||||
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
|
||||
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
|
||||
default: RoundAdd = (`FLEN+1)'(0);
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
|
||||
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
|
||||
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
|
||||
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign NormSumTruncated = NormSum[`NF+1:2];
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
assign ResultExp = FullResultExp[`NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
module fmaflags(
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XZeroM, YZeroM, // inputs are zero
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
output logic Invalid, Overflow, Underflow, // flags used to select the result
|
||||
output logic [4:0] FMAFlgM // FMA flags
|
||||
);
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic GtMaxExp; // is exponent greater than the maximum
|
||||
logic UnderflowFlag, Inexact; // flags
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
|
||||
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
if (`FPSIZES == 1) begin
|
||||
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
`FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
|
||||
`FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
|
||||
default: GtMaxExp = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
|
||||
2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
|
||||
2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result was rounded up to a normal number
|
||||
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module resultselect(
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic ResultSgn, // the result's sign
|
||||
input logic CalcPlus1, // rounding bits
|
||||
input logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
input logic Invalid, Overflow, Underflow, // flags
|
||||
input logic ResultDenorm, // is the result denormalized
|
||||
input logic [`NE-1:0] ResultExp, // Result exponent
|
||||
input logic [`NF-1:0] ResultFrac, // Result fraction
|
||||
output logic [`FLEN-1:0] FMAResM // FMA final result
|
||||
);
|
||||
logic InfSgn;
|
||||
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
|
||||
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
|
||||
if (`FPSIZES == 1) begin
|
||||
if(`IEEE754) begin
|
||||
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
|
||||
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
|
||||
if(`IEEE754) begin
|
||||
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
|
||||
YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
|
||||
ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
|
||||
InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
|
||||
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = (`FLEN)'(0);
|
||||
YNaNResult = (`FLEN)'(0);
|
||||
ZNaNResult = (`FLEN)'(0);
|
||||
InvalidResult = (`FLEN)'(0);
|
||||
end else begin
|
||||
XNaNResult = (`FLEN)'(0);
|
||||
end
|
||||
OverflowResult = (`FLEN)'(0);
|
||||
KillProdResult = (`FLEN)'(0);
|
||||
UnderflowResult = (`FLEN)'(0);
|
||||
InfResult = (`FLEN)'(0);
|
||||
NormResult = (`FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
|
||||
YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
|
||||
ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
|
||||
InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
|
||||
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
|
||||
YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
|
||||
ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
|
||||
InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
|
||||
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
|
||||
YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
|
||||
ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
|
||||
InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
|
||||
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
if(`IEEE754) begin
|
||||
assign FMAResM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult :
|
||||
XInfM|YInfM|ZInfM ? InfResult :
|
||||
KillProdM ? KillProdResult :
|
||||
Overflow ? OverflowResult :
|
||||
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
|
||||
NormResult;
|
||||
end else begin
|
||||
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
|
||||
XInfM|YInfM|ZInfM ? InfResult :
|
||||
KillProdM ? KillProdResult :
|
||||
Overflow ? OverflowResult :
|
||||
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
|
||||
NormResult;
|
||||
end
|
||||
|
||||
endmodule
|
127
pipelined/src/fpu/fmashiftcalc.sv
Normal file
127
pipelined/src/fpu/fmashiftcalc.sv
Normal file
@ -0,0 +1,127 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmashiftcalc(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic [`NE-1:0] ZExpM, // exponent of Z
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZDenormM,
|
||||
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
output logic SumZero, // is the result denormalized - calculated before LZA corection
|
||||
output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
|
||||
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
|
||||
);
|
||||
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} + 1 - (`NE+2)'(`NF+4));
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ConvNormSumExp = NormSumExp;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: ConvNormSumExp = NormSumExp;
|
||||
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
|
||||
default: ConvNormSumExp = `NE+2'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: ConvNormSumExp = NormSumExp;
|
||||
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine if the result is denormalized
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
|
||||
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
|
||||
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
|
||||
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
default: PreResultDenorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
|
||||
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
|
||||
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
|
||||
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
|
||||
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// 010. when should be 001.
|
||||
// - shift left one
|
||||
// - add one from exp
|
||||
// - if kill prod dont add to exp
|
||||
|
||||
// Determine if the result is denormal
|
||||
// assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// set and calculate the shift input and amount
|
||||
assign FmaShiftIn = {3'b0, SumM};
|
||||
assign FmaShiftAmt = FmaNormCntM+DenormShift;
|
||||
endmodule
|
@ -45,6 +45,8 @@ module fpu (
|
||||
output logic FWriteIntE, // integer register write enables
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
|
||||
output logic [1:0] FResSelW,
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
|
||||
@ -68,24 +70,24 @@ module fpu (
|
||||
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division or squareroot
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
|
||||
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
|
||||
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM, YSgnM; // input's sign - memory stage
|
||||
logic XSgnM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
|
||||
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
@ -95,7 +97,7 @@ module fpu (
|
||||
logic XNaNQ, YNaNQ; // is the input a NaN - divide
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, ZDenormE; // is the input denormalized
|
||||
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XZeroQ, YZeroQ; // is the input zero - divide
|
||||
@ -104,24 +106,43 @@ module fpu (
|
||||
logic XInfQ, YInfQ; // is the input infinity - divide
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic FmtQ;
|
||||
logic FOpCtrlQ;
|
||||
logic FOpCtrlQ;
|
||||
|
||||
// Fma Signals
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
|
||||
logic CvtResSgnE, CvtResSgnM; // the result's sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
logic [4:0] FDivFlgM; // divide/squareroot flags
|
||||
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [4:0] FMAFlgM; // FMA/multiply result
|
||||
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
|
||||
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
|
||||
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
|
||||
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`XLEN-1:0] FIntResE; // classify result
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // classify result
|
||||
logic [`FLEN-1:0] PostProcResM; // classify result
|
||||
logic [4:0] PostProcFlgM; // classify result
|
||||
logic [`XLEN-1:0] FCvtIntResM;
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
|
||||
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
|
||||
logic [`XLEN-1:0] FIntResE;
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
|
||||
// other signals
|
||||
logic FDivSqrtDoneE; // is divide done
|
||||
@ -133,10 +154,20 @@ module fpu (
|
||||
|
||||
// DECODE STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// |||||||||||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// |||||||||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
|
||||
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
|
||||
.FmtD, .FrmD, .FWriteIntD);
|
||||
|
||||
// FP register file
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
@ -150,20 +181,31 @@ module fpu (
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
|
||||
// EXECUTION STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||||||||||||
|
||||
// |||
|
||||
// |||
|
||||
// |||||||||
|
||||
// |||
|
||||
// |||
|
||||
// ||||||||||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Hazard unit for FPU
|
||||
// - determines if any forwarding or stalls are needed
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
|
||||
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FpResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FpResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FpResM, FForwardZE, FPreSrcZE);
|
||||
|
||||
|
||||
generate
|
||||
@ -178,7 +220,7 @@ module fpu (
|
||||
endgenerate
|
||||
|
||||
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
|
||||
// Force Z to be 0 for multiply instructions
|
||||
generate
|
||||
@ -201,21 +243,12 @@ module fpu (
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
|
||||
|
||||
// FMA
|
||||
// - two stage FMA
|
||||
// - execute stage - multiplication and addend shifting
|
||||
// - memory stage - addition and rounding
|
||||
// - handles FMA and multiply instructions
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
|
||||
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FOpCtrlE,
|
||||
.FmtE, .FmtM, .FrmM,
|
||||
.FMAFlgM, .FMAResM);
|
||||
|
||||
// fma - does multiply, add, and multiply-add instructions
|
||||
fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
|
||||
.XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE,
|
||||
.ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// fpdivsqrt using Goldschmidt's iteration
|
||||
if(`FLEN == 64) begin
|
||||
@ -245,11 +278,14 @@ module fpu (
|
||||
|
||||
// other FP execution units
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
|
||||
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
|
||||
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
|
||||
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE,
|
||||
.FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE,
|
||||
.CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE,
|
||||
.CvtLzcInE);
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
@ -269,16 +305,16 @@ module fpu (
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
// select a result that may be written to the FP register
|
||||
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
|
||||
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
|
||||
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
if (`FLEN>`XLEN)
|
||||
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
assign IntSrcXE = FSrcXE[`XLEN-1:0];
|
||||
else
|
||||
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
|
||||
|
||||
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
|
||||
// *** make sure the fpu matches the chapter diagram
|
||||
|
||||
@ -286,33 +322,68 @@ module fpu (
|
||||
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
|
||||
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
|
||||
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FrmE, FmtE},
|
||||
{FRegWriteM, FResultSelM, FrmM, FmtM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
|
||||
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
|
||||
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
|
||||
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
|
||||
|
||||
// BEGIN MEMORY STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||| |||
|
||||
// |||||| ||||||
|
||||
// ||| ||| ||| |||
|
||||
// ||| ||||| |||
|
||||
// ||| ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
|
||||
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
|
||||
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
|
||||
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
|
||||
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
|
||||
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
|
||||
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
|
||||
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, FmtM},
|
||||
{FRegWriteW, FResultSelW, FmtW});
|
||||
{FRegWriteM, FResSelM, FmtM},
|
||||
{FRegWriteW, FResSelW, FmtW});
|
||||
|
||||
// BEGIN WRITEBACK STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| ||| |||
|
||||
// ||| ||||| |||
|
||||
// ||| ||| ||| |||
|
||||
// |||||| ||||||
|
||||
// ||| |||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// put ReadData into NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// - for load instruction
|
||||
@ -328,6 +399,6 @@ module fpu (
|
||||
endgenerate
|
||||
|
||||
// select the result to be written to the FP register
|
||||
if(`FLEN>=64)
|
||||
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
|
||||
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
|
||||
|
||||
endmodule // fpu
|
||||
|
29
pipelined/src/fpu/lzacorrection.sv
Normal file
29
pipelined/src/fpu/lzacorrection.sv
Normal file
@ -0,0 +1,29 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module lzacorrection(
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
input logic FmaOp,
|
||||
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic SumZero,
|
||||
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] SumExp // exponent of the normalized sum
|
||||
);
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic ResDenorm; // is the result denormalized
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
|
||||
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
|
||||
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
|
||||
|
||||
endmodule
|
46
pipelined/src/fpu/normshift.sv
Normal file
46
pipelined/src/fpu/normshift.sv
Normal file
@ -0,0 +1,46 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
|
||||
// convert shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
module normshift(
|
||||
input logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt, // normalization shift count
|
||||
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // is the sum zero
|
||||
output logic [`NORMSHIFTSZ-1:0] Shifted // is the sum zero
|
||||
);
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
endmodule
|
203
pipelined/src/fpu/postprocess.sv
Normal file
203
pipelined/src/fpu/postprocess.sv
Normal file
@ -0,0 +1,203 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: Katherine Parry, David Harris
|
||||
// Modified: 6/23/2021
|
||||
//
|
||||
// Purpose: Floating point multiply-accumulate of configurable size
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess(
|
||||
input logic XSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic CvtResDenormUfM,
|
||||
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
|
||||
input logic CvtResSgnM, // the result's sign
|
||||
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic IntZeroM, // is the input zero
|
||||
input logic [1:0] PostProcSelM, // select result to be written to fp register
|
||||
output logic [`FLEN-1:0] PostProcResM, // FMA final result
|
||||
output logic [4:0] PostProcFlgM,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
|
||||
);
|
||||
|
||||
|
||||
|
||||
logic [`NF-1:0] ResFrac; // Result fraction
|
||||
logic [`NE-1:0] ResExp; // Result exponent
|
||||
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
|
||||
logic SumZero; // is the sum zero
|
||||
logic Sticky; // Sticky bit
|
||||
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Round; // bits needed to determine rounding
|
||||
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic Mult; // multiply opperation
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [3*`NF+8:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
logic Overflow, Underflow, Invalid; // flags
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [`NE+1:0] RoundExp;
|
||||
logic [1:0] NegResMSBS;
|
||||
logic CvtOp;
|
||||
logic FmaOp;
|
||||
logic CvtResUf;
|
||||
logic DivOp;
|
||||
logic InfIn;
|
||||
logic ResSgn;
|
||||
logic NaNIn;
|
||||
logic UfLSBRes;
|
||||
logic Sqrt;
|
||||
logic [`FMTBITS-1:0] OutFmt;
|
||||
|
||||
// signals to help readability
|
||||
assign Signed = FOpCtrlM[0];
|
||||
assign Int64 = FOpCtrlM[1];
|
||||
assign IntToFp = FOpCtrlM[2];
|
||||
assign ToInt = FWriteIntM;
|
||||
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
|
||||
assign CvtOp = (PostProcSelM == 2'b00);
|
||||
assign FmaOp = (PostProcSelM == 2'b10);
|
||||
assign DivOp = (PostProcSelM == 2'b01);
|
||||
assign Sqrt = FOpCtrlM[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
|
||||
assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: FmtM contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
|
||||
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
|
||||
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: begin // fma
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
|
||||
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
|
||||
end
|
||||
2'b00: begin // cvt
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
|
||||
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
|
||||
end
|
||||
2'b01: begin //div
|
||||
ShiftAmt = 0;//{DivShiftAmt};
|
||||
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
|
||||
end
|
||||
default: begin
|
||||
ShiftAmt = 0;
|
||||
ShiftIn = 0;
|
||||
end
|
||||
endcase
|
||||
|
||||
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
|
||||
.SumZero, .Shifted, .SumExp, .CorrShifted);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
|
||||
.InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
|
||||
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
|
||||
.ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
|
||||
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
|
||||
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round,
|
||||
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
|
||||
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM,
|
||||
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
|
||||
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
|
||||
.FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
|
||||
|
||||
endmodule
|
282
pipelined/src/fpu/resultselect.sv
Normal file
282
pipelined/src/fpu/resultselect.sv
Normal file
@ -0,0 +1,282 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultselect(
|
||||
input logic XSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn,
|
||||
input logic XZeroM,
|
||||
input logic IntZeroM,
|
||||
input logic NaNIn,
|
||||
input logic IntToFp,
|
||||
input logic Int64,
|
||||
input logic Signed,
|
||||
input logic CvtOp,
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
|
||||
input logic FmaOp,
|
||||
input logic Plus1,
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ResSgn, // the res's sign
|
||||
input logic [`FLEN:0] RoundAdd, // how much to add to the res
|
||||
input logic Invalid, Overflow, // flags
|
||||
input logic CvtResUf,
|
||||
input logic [`NE-1:0] ResExp, // Res exponent
|
||||
input logic [`NE+1:0] FullResExp, // Res exponent
|
||||
input logic [`NF-1:0] ResFrac, // Res fraction
|
||||
output logic [`FLEN-1:0] PostProcResM, // final res
|
||||
output logic [1:0] NegResMSBS,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // final res
|
||||
);
|
||||
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
|
||||
logic OfResMax;
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic KillRes;
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
|
||||
//NaN res selection depending on standard
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
assign NormRes = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
|
||||
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
|
||||
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
|
||||
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = (`FLEN)'(0);
|
||||
YNaNRes = (`FLEN)'(0);
|
||||
ZNaNRes = (`FLEN)'(0);
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
end
|
||||
OfRes = (`FLEN)'(0);
|
||||
KillProdRes = (`FLEN)'(0);
|
||||
UfRes = (`FLEN)'(0);
|
||||
NormRes = (`FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
|
||||
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
|
||||
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
|
||||
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
|
||||
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
|
||||
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
|
||||
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// determine if you shoould kill the res - Cvt
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
|
||||
|
||||
if(`IEEE754) begin
|
||||
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
|
||||
YNaNM&~CvtOp ? YNaNRes :
|
||||
ZNaNM&FmaOp ? ZNaNRes :
|
||||
Invalid ? InvalidRes :
|
||||
Overflow|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end else begin
|
||||
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
|
||||
Overflow|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ||||||||||| ||| ||| |||||||||||||
|
||||
// ||| |||||| ||| |||
|
||||
// ||| ||| ||| ||| |||
|
||||
// ||| ||| |||||| |||
|
||||
// ||||||||||| ||| ||| |||
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// *** probably can optimize the negation
|
||||
// select the overflow integer res
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged res should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
// round and negate the positive res if needed
|
||||
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
|
||||
//*** false critical path probably
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
|
||||
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal res (trmined and sign extended if nessisary)
|
||||
assign FCvtIntResM = Invalid ? OfIntRes :
|
||||
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
endmodule
|
50
pipelined/src/fpu/resultsign.sv
Normal file
50
pipelined/src/fpu/resultsign.sv
Normal file
@ -0,0 +1,50 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultsign(
|
||||
input logic [2:0] FrmM,
|
||||
input logic PSgnM, ZSgnEffM,
|
||||
input logic InvZM,
|
||||
input logic ZInfM,
|
||||
input logic InfIn,
|
||||
input logic NegSumM,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic [`NE+1:0] SumExp,
|
||||
input logic SumZero,
|
||||
input logic Mult,
|
||||
input logic Round,
|
||||
input logic Sticky,
|
||||
input logic CvtResSgnM,
|
||||
output logic ResSgn
|
||||
);
|
||||
|
||||
logic ZeroSgn;
|
||||
logic InfSgn;
|
||||
logic FmaResSgn;
|
||||
logic FmaResSgnTmp;
|
||||
logic Underflow;
|
||||
// logic ResultSgnTmp;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// if multiply then Psgn
|
||||
// otherwise psign
|
||||
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
|
||||
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
|
||||
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
|
||||
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
|
||||
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: ResSgn = FmaResSgn; // fma
|
||||
2'b00: ResSgn = CvtResSgnM; // cvt
|
||||
2'b01: ResSgn = 0; // divide
|
||||
default: ResSgn = 1'bx;
|
||||
endcase
|
||||
endmodule
|
316
pipelined/src/fpu/round.sv
Normal file
316
pipelined/src/fpu/round.sv
Normal file
@ -0,0 +1,316 @@
|
||||
`include "wally-config.vh"
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
|
||||
|
||||
module round(
|
||||
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic FmaOp,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic CvtResDenormUfM,
|
||||
input logic ToInt,
|
||||
input logic CvtOp,
|
||||
input logic CvtResUf,
|
||||
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
input logic InvZM, // invert Z
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ResSgn, // the result's sign
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
output logic UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResFrac, // Result fraction
|
||||
output logic [`NE-1:0] ResExp, // Result exponent
|
||||
output logic Sticky, // sticky bit
|
||||
output logic [`NE+1:0] RoundExp,
|
||||
output logic Plus1,
|
||||
output logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
output logic Round, UfLSBRes // bits needed to calculate rounding
|
||||
);
|
||||
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
|
||||
logic NormSumSticky; // normalized sum's sticky bit
|
||||
logic UfSticky; // sticky bit for underlow calculation
|
||||
logic [`NF-1:0] RoundFrac;
|
||||
logic FpRes, IntRes;
|
||||
logic UfRound;
|
||||
logic FpRound, FpLSBRes, FpUfRound;
|
||||
logic CalcPlus1, FpPlus1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - plus 1 otherwise
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - Plus 1 otherwise
|
||||
|
||||
assign IntRes = CvtOp & ToInt;
|
||||
assign FpRes = ~IntRes;
|
||||
|
||||
// sticky bit calculation
|
||||
if (`FPSIZES == 1) begin
|
||||
|
||||
// 1: XLEN > NF
|
||||
// | XLEN |
|
||||
// | NF |1|1|
|
||||
// ^ ^ if floating point result
|
||||
// ^ if not an FMA result
|
||||
if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN
|
||||
if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// XLEN is either 64 or 32
|
||||
// so half and single are always smaller then XLEN
|
||||
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
// Quad precision will always be greater than XLEN
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
|
||||
assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
|
||||
assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
|
||||
end
|
||||
`FMT1: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
|
||||
end
|
||||
`FMT2: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpRound = 1'bx;
|
||||
FpLSBRes = 1'bx;
|
||||
FpUfRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
|
||||
assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
|
||||
assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfLSBRes = FpRound;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
|
||||
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
|
||||
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
|
||||
assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
|
||||
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
|
||||
default: RoundAdd = (`FLEN+1)'(0);
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
|
||||
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
|
||||
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine the result to be roundned
|
||||
assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: RoundExp = SumExp; // fma
|
||||
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
|
||||
2'b01: RoundExp = 0; // divide
|
||||
default: RoundExp = 0;
|
||||
endcase
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
@ -61,6 +61,8 @@ module datapath (
|
||||
(* mark_debug = "true" *) input logic RegWriteW,
|
||||
input logic SquashSCW,
|
||||
input logic [2:0] ResultSrcW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
input logic [1:0] FResSelW,
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
|
||||
@ -120,14 +122,17 @@ module datapath (
|
||||
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
|
||||
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
|
||||
// floating point interactions: fcvt, fp stores
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end else begin:fpmux
|
||||
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
|
||||
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
|
@ -61,6 +61,8 @@ module ieu (
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
|
||||
input logic [1:0] FResSelW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
output logic [4:0] RdW,
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
@ -105,8 +107,8 @@ module ieu (
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
|
||||
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
|
||||
forward fw(
|
||||
|
@ -38,9 +38,13 @@ module ifu (
|
||||
// Bus interface
|
||||
(* mark_debug = "true" *) input logic [`XLEN-1:0] IFUBusHRDATA,
|
||||
(* mark_debug = "true" *) input logic IFUBusAck,
|
||||
(* mark_debug = "true" *) input logic IFUBusInit,
|
||||
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUBusAdr,
|
||||
(* mark_debug = "true" *) output logic IFUBusRead,
|
||||
(* mark_debug = "true" *) output logic IFUStallF,
|
||||
(* mark_debug = "true" *) output logic [2:0] IFUBurstType,
|
||||
(* mark_debug = "true" *) output logic [1:0] IFUTransType,
|
||||
(* mark_debug = "true" *) output logic IFUTransComplete,
|
||||
(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF,
|
||||
// Execute
|
||||
output logic [`XLEN-1:0] PCLinkE,
|
||||
@ -201,8 +205,8 @@ module ifu (
|
||||
|
||||
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
busdp(.clk, .reset,
|
||||
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
|
||||
.LSUBusRead(IFUBusRead), .LSUBusSize(),
|
||||
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
|
||||
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
|
||||
.LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
|
||||
.WordCount(),
|
||||
.DCacheFetchLine(ICacheFetchLine),
|
||||
|
@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
// bus interface
|
||||
input logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
input logic LSUBusAck,
|
||||
input logic LSUBusInit,
|
||||
output logic LSUBusWrite,
|
||||
output logic LSUBusRead,
|
||||
output logic [2:0] LSUBusSize,
|
||||
output logic [2:0] LSUBusSize,
|
||||
output logic [2:0] LSUBurstType,
|
||||
output logic [1:0] LSUTransType, // For AHBLite
|
||||
output logic LSUTransComplete,
|
||||
input logic [2:0] LSUFunct3M,
|
||||
output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
|
||||
output logic [LOGWPL-1:0] WordCount,
|
||||
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
|
||||
localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
|
||||
logic [`PA_BITS-1:0] LocalLSUBusAdr;
|
||||
logic [LOGWPL-1:0] WordCountDelayed;
|
||||
|
||||
|
||||
// *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
|
||||
// *** better name than DCacheBusWriteData
|
||||
genvar index;
|
||||
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
|
||||
logic [WORDSPERLINE-1:0] CaptureWord;
|
||||
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
|
||||
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
|
||||
flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
|
||||
.q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
|
||||
end
|
||||
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
|
||||
busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
|
||||
.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
|
||||
.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
|
||||
.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
|
||||
.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
|
||||
.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
|
||||
endmodule
|
||||
|
@ -41,6 +41,7 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
input logic DCacheFetchLine,
|
||||
input logic DCacheWriteLine,
|
||||
input logic LSUBusAck,
|
||||
input logic LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
|
||||
input logic CPUBusy,
|
||||
input logic CacheableM,
|
||||
|
||||
@ -48,10 +49,13 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
output logic LSUBusWrite,
|
||||
output logic LSUBusWriteCrit,
|
||||
output logic LSUBusRead,
|
||||
output logic [2:0] LSUBurstType,
|
||||
output logic LSUTransComplete,
|
||||
output logic [1:0] LSUTransType,
|
||||
output logic DCacheBusAck,
|
||||
output logic BusCommittedM,
|
||||
output logic SelUncachedAdr,
|
||||
output logic [LOGWPL-1:0] WordCount);
|
||||
output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
|
||||
|
||||
|
||||
|
||||
@ -61,7 +65,8 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
logic CntReset;
|
||||
logic WordCountFlag;
|
||||
logic [LOGWPL-1:0] NextWordCount;
|
||||
logic UnCachedAccess;
|
||||
logic UnCachedAccess, UnCachedRW;
|
||||
logic [2:0] LocalBurstType;
|
||||
|
||||
|
||||
typedef enum logic [2:0] {STATE_BUS_READY,
|
||||
@ -75,18 +80,27 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
|
||||
(* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;
|
||||
|
||||
|
||||
// Used to send address for address stage of AHB.
|
||||
flopenr #(LOGWPL)
|
||||
WordCountReg(.clk(clk),
|
||||
.reset(reset | CntReset),
|
||||
.en(CntEn),
|
||||
.d(NextWordCount),
|
||||
.q(WordCount));
|
||||
.q(WordCount));
|
||||
|
||||
// Used to store data from data phase of AHB.
|
||||
flopenr #(LOGWPL)
|
||||
WordCountDelayedReg(.clk(clk),
|
||||
.reset(reset | CntReset),
|
||||
.en(CntEn),
|
||||
.d(WordCount),
|
||||
.q(WordCountDelayed));
|
||||
|
||||
assign NextWordCount = WordCount + 1'b1;
|
||||
|
||||
assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
|
||||
assign CntEn = PreCntEn & LSUBusAck;
|
||||
assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
|
||||
assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
|
||||
assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.
|
||||
|
||||
assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;
|
||||
|
||||
@ -120,14 +134,29 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case(WordCountThreshold)
|
||||
0: LocalBurstType = 3'b000;
|
||||
3: LocalBurstType = 3'b011; // INCR4
|
||||
7: LocalBurstType = 3'b101; // INCR8
|
||||
15: LocalBurstType = 3'b111; // INCR16
|
||||
default: LocalBurstType = 3'b001; // INCR without end.
|
||||
endcase
|
||||
end
|
||||
|
||||
assign CntReset = BusCurrState == STATE_BUS_READY;
|
||||
// Would these be better as always_comb statements or muxes?
|
||||
assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
|
||||
assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
|
||||
// Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
|
||||
assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00;
|
||||
// Reset if we aren't initiating a transaction or if we are finishing a transaction.
|
||||
assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete;
|
||||
|
||||
assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_READ) |
|
||||
(BusCurrState == STATE_BUS_FETCH) |
|
||||
(BusCurrState == STATE_BUS_WRITE);
|
||||
assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
|
||||
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
|
||||
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
|
||||
@ -139,6 +168,10 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
(BusCurrState == STATE_BUS_UNCACHED_READ);
|
||||
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
|
||||
|
||||
|
||||
// Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
|
||||
assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;
|
||||
|
||||
assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
|
||||
(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
|
||||
assign BusCommittedM = BusCurrState != STATE_BUS_READY;
|
||||
|
@ -66,9 +66,13 @@ module lsu (
|
||||
(* mark_debug = "true" *) output logic LSUBusRead,
|
||||
(* mark_debug = "true" *) output logic LSUBusWrite,
|
||||
(* mark_debug = "true" *) input logic LSUBusAck,
|
||||
(* mark_debug = "true" *) input logic LSUBusInit,
|
||||
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
|
||||
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
|
||||
(* mark_debug = "true" *) output logic [2:0] LSUBurstType,
|
||||
(* mark_debug = "true" *) output logic [1:0] LSUTransType,
|
||||
(* mark_debug = "true" *) output logic LSUTransComplete,
|
||||
// page table walker
|
||||
input logic [`XLEN-1:0] SATP_REGW, // from csr
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
@ -211,7 +215,7 @@ module lsu (
|
||||
|
||||
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
|
||||
.clk, .reset,
|
||||
.LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
|
||||
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
|
||||
.WordCount, .LSUBusWriteCrit,
|
||||
.LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
|
||||
.DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,
|
||||
|
@ -525,7 +525,7 @@ module ppa_decoder #(parameter WIDTH = 8) (
|
||||
end
|
||||
endmodule
|
||||
|
||||
module ppa_mux2_1 #(parameter WIDTH = 1) (
|
||||
module ppa_mux2d_1 #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
@ -533,7 +533,7 @@ module ppa_mux2_1 #(parameter WIDTH = 1) (
|
||||
assign y = s ? d1 : d0;
|
||||
endmodule
|
||||
|
||||
module ppa_mux4_1 #(parameter WIDTH = 1) (
|
||||
module ppa_mux4d_1 #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
@ -541,7 +541,7 @@ module ppa_mux4_1 #(parameter WIDTH = 1) (
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
endmodule
|
||||
|
||||
module ppa_mux8_1 #(parameter WIDTH = 1) (
|
||||
module ppa_mux8d_1 #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
|
@ -48,7 +48,7 @@ module gpio (
|
||||
|
||||
logic [31:0] input0d, input1d, input2d, input3d;
|
||||
logic [31:0] input_val, input_en, output_en, output_val;
|
||||
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip;
|
||||
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor;
|
||||
|
||||
logic initTrans, memwrite;
|
||||
logic [7:0] entry, entryd;
|
||||
@ -91,6 +91,7 @@ module gpio (
|
||||
high_ip <= #1 0;
|
||||
low_ie <= #1 0;
|
||||
low_ip <= #1 0;
|
||||
out_xor <= #1 0;
|
||||
end else begin
|
||||
// writes
|
||||
if (memwrite)
|
||||
@ -104,7 +105,7 @@ module gpio (
|
||||
8'h20: fall_ie <= #1 Din;
|
||||
8'h28: high_ie <= #1 Din;
|
||||
8'h30: low_ie <= #1 Din;
|
||||
8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
|
||||
8'h40: out_xor <= #1 Din;
|
||||
endcase
|
||||
/* verilator lint_on CASEINCOMPLETE */
|
||||
// reads
|
||||
@ -121,7 +122,7 @@ module gpio (
|
||||
8'h2C: Dout <= #1 high_ip;
|
||||
8'h30: Dout <= #1 low_ie;
|
||||
8'h34: Dout <= #1 low_ip;
|
||||
8'h40: Dout <= #1 0; // OUT_XOR reads as 0
|
||||
8'h40: Dout <= #1 out_xor;
|
||||
default: Dout <= #1 0;
|
||||
endcase
|
||||
// interrupts
|
||||
@ -152,7 +153,7 @@ module gpio (
|
||||
flop #(32) sync2(HCLK,input1d,input2d);
|
||||
flop #(32) sync3(HCLK,input2d,input3d);
|
||||
assign input_val = input3d;
|
||||
assign GPIOPinsOut = output_val;
|
||||
assign GPIOPinsOut = output_val ^ out_xor;
|
||||
assign GPIOPinsEn = output_en;
|
||||
|
||||
assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};
|
||||
|
@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
|
||||
output logic HRESPRam, HREADYRam
|
||||
);
|
||||
|
||||
// Desired changes.
|
||||
// 1. find a way to merge read and write address into 1 port.
|
||||
// 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
|
||||
// 3. implement burst.
|
||||
// 4. remove the configurable latency.
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
logic [`XLEN/8-1:0] ByteMask;
|
||||
logic [31:0] HADDRD, RamAddr;
|
||||
//logic prevHREADYRam, risingHREADYRam;
|
||||
logic initTrans;
|
||||
logic memwrite, memwriteD, memread;
|
||||
logic nextHREADYRam;
|
||||
//logic [3:0] busycount;
|
||||
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
|
||||
|
||||
// a new AHB transactions starts when HTRANS requests a transaction,
|
||||
// the peripheral is selected, and the previous transaction is completing
|
||||
assign initTrans = HREADY & HSELRam & (HTRANS[1]);
|
||||
assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface
|
||||
assign memwrite = initTrans & HWRITE;
|
||||
assign memread = initTrans & ~HWRITE;
|
||||
|
||||
flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD);
|
||||
flopenr #(32) haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);
|
||||
|
||||
/* // busy FSM to extend READY signal
|
||||
always @(posedge HCLK, negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else begin
|
||||
if (initTrans) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else if (~HREADYRam) begin
|
||||
if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2
|
||||
HREADYRam <= #1 1;
|
||||
end else begin
|
||||
busycount <= busycount + 1;
|
||||
end
|
||||
end
|
||||
end */
|
||||
|
||||
|
||||
// Stall on a read after a write because the RAM can't take both adddresses on the same cycle
|
||||
assign nextHREADYRam = ~(memwriteD & memread);
|
||||
// assign nextHREADYRam = ~(memwriteD & ~memwrite);
|
||||
flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
|
||||
// assign HREADYRam = ~(memwriteD & ~memwrite);
|
||||
assign HRESPRam = 0; // OK
|
||||
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
/* // Rising HREADY edge detector
|
||||
// Indicates when ram is finishing up
|
||||
// Needed because HREADY may go high for other reasons,
|
||||
// and we only want to write data when finishing up.
|
||||
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
|
||||
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
|
||||
|
||||
/*
|
||||
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
|
||||
memory(.clk(HCLK), .reA(1'b1),
|
||||
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
|
||||
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
|
||||
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
|
||||
|
||||
|
||||
|
||||
// On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
|
||||
mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);
|
||||
|
||||
// Byte mask for subword writes
|
||||
// ***the CLINT and other peripherals duplicate this hardware
|
||||
// *** it shoudl be centralized and sent over HWSTRB
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
|
||||
|
||||
// single-ported RAM
|
||||
bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
|
||||
memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));
|
||||
|
@ -93,10 +93,12 @@ module wallypipelinedcore (
|
||||
logic FWriteIntE;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FRegWriteM;
|
||||
logic FPUStallD;
|
||||
logic [1:0] FResSelW;
|
||||
logic [4:0] SetFflagsM;
|
||||
|
||||
// memory management unit signals
|
||||
@ -134,13 +136,16 @@ module wallypipelinedcore (
|
||||
logic [`PA_BITS-1:0] IFUBusAdr;
|
||||
logic [`XLEN-1:0] IFUBusHRDATA;
|
||||
logic IFUBusRead;
|
||||
logic IFUBusAck;
|
||||
logic IFUBusAck, IFUBusInit;
|
||||
logic [2:0] IFUBurstType;
|
||||
logic [1:0] IFUTransType;
|
||||
logic IFUTransComplete;
|
||||
|
||||
// AHB LSU interface
|
||||
logic [`PA_BITS-1:0] LSUBusAdr;
|
||||
logic LSUBusRead;
|
||||
logic LSUBusWrite;
|
||||
logic LSUBusAck;
|
||||
logic LSUBusAck, LSUBusInit;
|
||||
logic [`XLEN-1:0] LSUBusHRDATA;
|
||||
logic [`XLEN-1:0] LSUBusHWDATA;
|
||||
|
||||
@ -152,6 +157,9 @@ module wallypipelinedcore (
|
||||
logic [4:0] InstrClassM;
|
||||
logic InstrAccessFaultF;
|
||||
logic [2:0] LSUBusSize;
|
||||
logic [2:0] LSUBurstType;
|
||||
logic [1:0] LSUTransType;
|
||||
logic LSUTransComplete;
|
||||
|
||||
logic DCacheMiss;
|
||||
logic DCacheAccess;
|
||||
@ -166,8 +174,8 @@ module wallypipelinedcore (
|
||||
.StallF, .StallD, .StallE, .StallM,
|
||||
.FlushF, .FlushD, .FlushE, .FlushM,
|
||||
// Fetch
|
||||
.IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
|
||||
.IFUBusRead, .IFUStallF,
|
||||
.IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
|
||||
.IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
|
||||
.ICacheAccess, .ICacheMiss,
|
||||
|
||||
// Execute
|
||||
@ -224,6 +232,8 @@ module wallypipelinedcore (
|
||||
.CSRReadValW, .ReadDataM, .MDUResultW,
|
||||
.RdW, .ReadDataW,
|
||||
.InstrValidM,
|
||||
.FCvtIntResW,
|
||||
.FResSelW,
|
||||
|
||||
// hazards
|
||||
.StallD, .StallE, .StallM, .StallW,
|
||||
@ -247,8 +257,8 @@ module wallypipelinedcore (
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataE,
|
||||
.ReadDataM, .FlushDCacheM,
|
||||
// connected to ahb (all stay the same)
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
|
||||
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
|
||||
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
|
||||
|
||||
// connect to csr or privilege and stay the same.
|
||||
.PrivilegeModeW, .BigEndianM, // connects to csr
|
||||
@ -279,13 +289,22 @@ module wallypipelinedcore (
|
||||
ahblite ebu(// IFU connections
|
||||
.clk, .reset,
|
||||
.UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
|
||||
.IFUBusAdr,
|
||||
.IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
|
||||
.IFUBusAdr, .IFUBusRead,
|
||||
.IFUBusHRDATA,
|
||||
.IFUBurstType,
|
||||
.IFUTransType,
|
||||
.IFUTransComplete,
|
||||
.IFUBusAck,
|
||||
.IFUBusInit,
|
||||
// Signals from Data Cache
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
|
||||
.LSUBusHRDATA,
|
||||
.LSUBusSize,
|
||||
.LSUBurstType,
|
||||
.LSUTransType,
|
||||
.LSUTransComplete,
|
||||
.LSUBusAck,
|
||||
.LSUBusInit,
|
||||
|
||||
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
|
||||
.HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
|
||||
@ -375,6 +394,8 @@ module wallypipelinedcore (
|
||||
.FWriteIntE, // integer register write enable
|
||||
.FWriteDataE, // Data to be written to memory
|
||||
.FIntResM, // data to be written to integer register
|
||||
.FCvtIntResW, // fp -> int conversion result to be stored in int register
|
||||
.FResSelW, // fpu result selection
|
||||
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
.SetFflagsM // FPU flags (to privileged unit)
|
||||
|
@ -1,289 +0,0 @@
|
||||
4000000000000000_4000000000000000_3ff0000000000000
|
||||
c018000000000000_4000000000000000_c008000000000000
|
||||
4024000000000000_4000000000000000_4014000000000000
|
||||
c032000000000000_4000000000000000_c022000000000000
|
||||
4041000000000000_4000000000000000_4031000000000000
|
||||
c05c000000000000_4000000000000000_c04c000000000000
|
||||
406e000000000000_4000000000000000_405e000000000000
|
||||
c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
|
||||
408199999999999a_4000000000000000_407199999999999a
|
||||
c093333333333333_4000000000000000_c083333333333333
|
||||
40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
|
||||
c0b004189374bc6a_4000000000000000_c0a004189374bc6a
|
||||
40c00068db8bac71_4000000000000000_40b00068db8bac71
|
||||
c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
|
||||
40e5555555555555_4000000000000000_40d5555555555555
|
||||
c0f999999999999a_4000000000000000_c0e999999999999a
|
||||
410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
|
||||
4000000000000000_c018000000000000_bfe5555555555555
|
||||
c018000000000000_c018000000000000_3ff0000000000000
|
||||
4024000000000000_c018000000000000_c00aaaaaaaaaaaab
|
||||
c032000000000000_c018000000000000_4018000000000000
|
||||
4041000000000000_c018000000000000_c026aaaaaaaaaaab
|
||||
c05c000000000000_c018000000000000_4032aaaaaaaaaaab
|
||||
406e000000000000_c018000000000000_c044000000000000
|
||||
c07ffff583a53b8e_c018000000000000_4055554e57c37d09
|
||||
408199999999999a_c018000000000000_c067777777777778
|
||||
c093333333333333_c018000000000000_4079999999999999
|
||||
40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
|
||||
c0b004189374bc6a_c018000000000000_40955acb6f46508d
|
||||
40c00068db8bac71_c018000000000000_c0a555e124ba3b41
|
||||
c0dd1745d1745d17_c018000000000000_40b364d9364d9365
|
||||
40e5555555555555_c018000000000000_c0cc71c71c71c71c
|
||||
c0f999999999999a_c018000000000000_40d1111111111111
|
||||
410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
|
||||
4000000000000000_4024000000000000_3fd999999999999a
|
||||
c018000000000000_4024000000000000_bfe3333333333333
|
||||
4024000000000000_4024000000000000_3ff0000000000000
|
||||
c032000000000000_4024000000000000_c00ccccccccccccd
|
||||
4041000000000000_4024000000000000_401b333333333333
|
||||
c05c000000000000_4024000000000000_c026666666666666
|
||||
406e000000000000_4024000000000000_4038000000000000
|
||||
c07ffff583a53b8e_4024000000000000_c0499991361dc93e
|
||||
408199999999999a_4024000000000000_405c28f5c28f5c2a
|
||||
c093333333333333_4024000000000000_c06eb851eb851eb8
|
||||
40a028f5c28f5c29_4024000000000000_4079db22d0e56042
|
||||
c0b004189374bc6a_4024000000000000_c089a027525460aa
|
||||
40c00068db8bac71_4024000000000000_40999a415f45e0b5
|
||||
c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
|
||||
40e5555555555555_4024000000000000_40b1111111111111
|
||||
c0f999999999999a_4024000000000000_c0c47ae147ae147b
|
||||
410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
|
||||
4000000000000000_c032000000000000_bfcc71c71c71c71c
|
||||
c018000000000000_c032000000000000_3fd5555555555555
|
||||
4024000000000000_c032000000000000_bfe1c71c71c71c72
|
||||
c032000000000000_c032000000000000_3ff0000000000000
|
||||
4041000000000000_c032000000000000_c00e38e38e38e38e
|
||||
c05c000000000000_c032000000000000_4018e38e38e38e39
|
||||
406e000000000000_c032000000000000_c02aaaaaaaaaaaab
|
||||
c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
|
||||
408199999999999a_c032000000000000_c04f49f49f49f4a0
|
||||
c093333333333333_c032000000000000_4051111111111111
|
||||
40a028f5c28f5c29_c032000000000000_c06cba9876543210
|
||||
c0b004189374bc6a_c032000000000000_407c790f3f086b67
|
||||
40c00068db8bac71_c032000000000000_c08c7281864da457
|
||||
c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
|
||||
40e5555555555555_c032000000000000_c0a2f684bda12f68
|
||||
c0f999999999999a_c032000000000000_40b6c16c16c16c17
|
||||
410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
|
||||
4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
|
||||
c018000000000000_4041000000000000_bfc6969696969697
|
||||
4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
|
||||
c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
|
||||
4041000000000000_4041000000000000_3ff0000000000000
|
||||
c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
|
||||
406e000000000000_4041000000000000_401c3c3c3c3c3c3c
|
||||
c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
|
||||
408199999999999a_4041000000000000_4030909090909091
|
||||
c093333333333333_4041000000000000_c042121212121212
|
||||
40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
|
||||
c0b004189374bc6a_4041000000000000_c06e25d3e863448b
|
||||
40c00068db8bac71_4041000000000000_407e1ee37f25085c
|
||||
c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
|
||||
40e5555555555555_4041000000000000_4094141414141414
|
||||
c0f999999999999a_4041000000000000_c0a8181818181818
|
||||
410c71c71c71c71c_4041000000000000_40bac5701ac5701a
|
||||
4000000000000000_c05c000000000000_bfa2492492492492
|
||||
c018000000000000_c05c000000000000_3fbb6db6db6db6db
|
||||
4024000000000000_c05c000000000000_bfc6db6db6db6db7
|
||||
c032000000000000_c05c000000000000_3fd4924924924925
|
||||
4041000000000000_c05c000000000000_bfe36db6db6db6db
|
||||
c05c000000000000_c05c000000000000_3ff0000000000000
|
||||
406e000000000000_c05c000000000000_c001249249249249
|
||||
c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
|
||||
408199999999999a_c05c000000000000_c0241d41d41d41d5
|
||||
c093333333333333_c05c000000000000_4035f15f15f15f16
|
||||
40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
|
||||
c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
|
||||
40c00068db8bac71_c05c000000000000_c062499c689fa081
|
||||
c0dd1745d1745d17_c05c000000000000_40709f959c427e56
|
||||
40e5555555555555_c05c000000000000_c088618618618618
|
||||
c0f999999999999a_c05c000000000000_409d41d41d41d41e
|
||||
410c71c71c71c71c_c05c000000000000_c0a0410410410410
|
||||
4000000000000000_406e000000000000_3f91111111111111
|
||||
c018000000000000_406e000000000000_bfa999999999999a
|
||||
4024000000000000_406e000000000000_3fb5555555555555
|
||||
c032000000000000_406e000000000000_bfc3333333333333
|
||||
4041000000000000_406e000000000000_3fd2222222222222
|
||||
c05c000000000000_406e000000000000_bfedddddddddddde
|
||||
406e000000000000_406e000000000000_3ff0000000000000
|
||||
c07ffff583a53b8e_406e000000000000_c001110b796930d4
|
||||
408199999999999a_406e000000000000_4012c5f92c5f92c6
|
||||
c093333333333333_406e000000000000_c0247ae147ae147b
|
||||
40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
|
||||
c0b004189374bc6a_406e000000000000_c041156f8c384071
|
||||
40c00068db8bac71_406e000000000000_40511180ea2e95ce
|
||||
c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
|
||||
40e5555555555555_406e000000000000_4076c16c16c16c16
|
||||
c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
|
||||
410c71c71c71c71c_406e000000000000_409e573ac901e573
|
||||
4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
|
||||
c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
|
||||
4024000000000000_c07ffff583a53b8e_bfa400068dbae089
|
||||
c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
|
||||
4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
|
||||
c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
|
||||
406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
|
||||
c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
|
||||
408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
|
||||
c093333333333333_c07ffff583a53b8e_401333397dd21f3c
|
||||
40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
|
||||
c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
|
||||
40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
|
||||
c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
|
||||
40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
|
||||
c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
|
||||
410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
|
||||
4000000000000000_408199999999999a_3f7d1745d1745d17
|
||||
c018000000000000_408199999999999a_bf85d1745d1745d1
|
||||
4024000000000000_408199999999999a_3f922e8ba2e8ba2e
|
||||
c032000000000000_408199999999999a_bfa05d1745d1745d
|
||||
4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
|
||||
c05c000000000000_408199999999999a_bfc9745d1745d174
|
||||
406e000000000000_408199999999999a_3fdb45d1745d1745
|
||||
c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
|
||||
408199999999999a_408199999999999a_3ff0000000000000
|
||||
c093333333333333_408199999999999a_c001745d1745d174
|
||||
40a028f5c28f5c29_408199999999999a_401d61bed61bed61
|
||||
c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
|
||||
40c00068db8bac71_408199999999999a_403d180477e6ade4
|
||||
c0dd1745d1745d17_408199999999999a_c04a723f789854a0
|
||||
40e5555555555555_408199999999999a_405364d9364d9364
|
||||
c0f999999999999a_408199999999999a_c06745d1745d1746
|
||||
410c71c71c71c71c_408199999999999a_4079dbcc48676f30
|
||||
4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
|
||||
c018000000000000_c093333333333333_3f74000000000000
|
||||
4024000000000000_c093333333333333_bf80aaaaaaaaaaab
|
||||
c032000000000000_c093333333333333_3f9e000000000000
|
||||
4041000000000000_c093333333333333_bfac555555555556
|
||||
c05c000000000000_c093333333333333_3fb7555555555556
|
||||
406e000000000000_c093333333333333_bfc9000000000000
|
||||
c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
|
||||
408199999999999a_c093333333333333_bfed555555555556
|
||||
c093333333333333_c093333333333333_3ff0000000000000
|
||||
40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
|
||||
c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
|
||||
40c00068db8bac71_c093333333333333_c02aab596de8ca12
|
||||
c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
|
||||
40e5555555555555_c093333333333333_c041c71c71c71c72
|
||||
c0f999999999999a_c093333333333333_4055555555555556
|
||||
410c71c71c71c71c_c093333333333333_c067b425ed097b42
|
||||
4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
|
||||
c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
|
||||
4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
|
||||
c032000000000000_40a028f5c28f5c29_bf81d260511be196
|
||||
4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
|
||||
c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
|
||||
406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
|
||||
c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
|
||||
408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
|
||||
c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
|
||||
40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
|
||||
c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
|
||||
40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
|
||||
c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
|
||||
40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
|
||||
c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
|
||||
410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
|
||||
4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
|
||||
c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
|
||||
4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
|
||||
c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
|
||||
4041000000000000_c0b004189374bc6a_bf80fba700417875
|
||||
c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
|
||||
406e000000000000_c0b004189374bc6a_bfadf853e2556b29
|
||||
c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
|
||||
408199999999999a_c0b004189374bc6a_bfc1951951951953
|
||||
c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
|
||||
40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
|
||||
c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
|
||||
40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
|
||||
c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
|
||||
40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
|
||||
c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
|
||||
410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
|
||||
4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
|
||||
c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
|
||||
4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
|
||||
c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
|
||||
4041000000000000_40c00068db8bac71_3f70ff909995ab11
|
||||
c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
|
||||
406e000000000000_40c00068db8bac71_3f9dff3b6962792e
|
||||
c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
|
||||
408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
|
||||
c093333333333333_40c00068db8bac71_bfc332b5622a8afe
|
||||
40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
|
||||
c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
|
||||
40c00068db8bac71_40c00068db8bac71_3ff0000000000000
|
||||
c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
|
||||
40e5555555555555_40c00068db8bac71_401554c989849a70
|
||||
c0f999999999999a_40c00068db8bac71_c02998f1d838b954
|
||||
410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
|
||||
4000000000000000_c0dd1745d1745d17_bf2199999999999a
|
||||
c018000000000000_c0dd1745d1745d17_3f3a666666666667
|
||||
4024000000000000_c0dd1745d1745d17_bf46000000000000
|
||||
c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
|
||||
4041000000000000_c0dd1745d1745d17_bf62b33333333333
|
||||
c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
|
||||
406e000000000000_c0dd1745d1745d17_bf80800000000000
|
||||
c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
|
||||
408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
|
||||
c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
|
||||
40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
|
||||
c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
|
||||
40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
|
||||
c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
|
||||
40e5555555555555_c0dd1745d1745d17_c007777777777777
|
||||
c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
|
||||
410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
|
||||
4000000000000000_40e5555555555555_3f18000000000000
|
||||
c018000000000000_40e5555555555555_bf22000000000000
|
||||
4024000000000000_40e5555555555555_3f3e000000000000
|
||||
c032000000000000_40e5555555555555_bf4b000000000000
|
||||
4041000000000000_40e5555555555555_3f59800000000000
|
||||
c05c000000000000_40e5555555555555_bf65000000000000
|
||||
406e000000000000_40e5555555555555_3f76800000000000
|
||||
c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
|
||||
408199999999999a_40e5555555555555_3f9a666666666667
|
||||
c093333333333333_40e5555555555555_bfaccccccccccccd
|
||||
40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
|
||||
c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
|
||||
40c00068db8bac71_40e5555555555555_3fd8009d495182aa
|
||||
c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
|
||||
40e5555555555555_40e5555555555555_3ff0000000000000
|
||||
c0f999999999999a_40e5555555555555_c003333333333334
|
||||
410c71c71c71c71c_40e5555555555555_4015555555555555
|
||||
4000000000000000_c0f999999999999a_bf04000000000000
|
||||
c018000000000000_c0f999999999999a_3f1e000000000000
|
||||
4024000000000000_c0f999999999999a_bf29000000000000
|
||||
c032000000000000_c0f999999999999a_3f36800000000000
|
||||
4041000000000000_c0f999999999999a_bf45400000000000
|
||||
c05c000000000000_c0f999999999999a_3f51800000000000
|
||||
406e000000000000_c0f999999999999a_bf62c00000000000
|
||||
c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
|
||||
408199999999999a_c0f999999999999a_bf86000000000000
|
||||
c093333333333333_c0f999999999999a_3f97ffffffffffff
|
||||
40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
|
||||
c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
|
||||
40c00068db8bac71_c0f999999999999a_bfc40083126e978d
|
||||
c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
|
||||
40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
|
||||
c0f999999999999a_c0f999999999999a_3ff0000000000000
|
||||
410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
|
||||
4000000000000000_410c71c71c71c71c_3ef2000000000000
|
||||
c018000000000000_410c71c71c71c71c_bf0b000000000000
|
||||
4024000000000000_410c71c71c71c71c_3f16800000000000
|
||||
c032000000000000_410c71c71c71c71c_bf24400000000000
|
||||
4041000000000000_410c71c71c71c71c_3f33200000000000
|
||||
c05c000000000000_410c71c71c71c71c_bf4f800000000000
|
||||
406e000000000000_410c71c71c71c71c_3f50e00000000000
|
||||
c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
|
||||
408199999999999a_410c71c71c71c71c_3f73ccccccccccce
|
||||
c093333333333333_410c71c71c71c71c_bf8599999999999a
|
||||
40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
|
||||
c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
|
||||
40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
|
||||
c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
|
||||
40e5555555555555_410c71c71c71c71c_3fd8000000000000
|
||||
c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
|
||||
410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000
|
@ -10,120 +10,64 @@ module testbenchfp;
|
||||
parameter TEST="none";
|
||||
|
||||
string Tests[]; // list of tests to be run
|
||||
string FmaRneTests[]; // list of FMA round to nearest even tests to run
|
||||
string FmaRuTests[]; // list of FMA round up tests to run
|
||||
string FmaRdTests[]; // list of FMA round down tests to run
|
||||
string FmaRzTests[]; // list of FMA round twords zero
|
||||
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
|
||||
logic [2:0] OpCtrl[]; // list of op controls
|
||||
logic [2:0] Unit[]; // list of units being tested
|
||||
logic WriteInt[]; // Is being written to integer resgiter
|
||||
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
|
||||
logic [1:0] Fmt[]; // list of formats for the other units
|
||||
logic [1:0] FmaFmt[]; // list of formats for the FMA
|
||||
|
||||
|
||||
logic clk=0;
|
||||
logic [31:0] TestNum=0; // index for the test
|
||||
logic [31:0] FmaTestNum=0; // index for the test
|
||||
logic [31:0] OpCtrlNum=0; // index for OpCtrl
|
||||
logic [31:0] errors=0; // how many errors
|
||||
logic [31:0] VectorNum=0; // index for test vector
|
||||
logic [31:0] FmaVectorNum=0; // index for test vector
|
||||
logic [31:0] FrmNum=0; // index for rounding mode
|
||||
logic [`FLEN*4+7:0] TestVectors[46464:0]; // list of test vectors
|
||||
logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
|
||||
logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; // list of fma ru test vectors
|
||||
logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; // list of fma rd test vectors
|
||||
logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; // list of fma rz test vectors
|
||||
logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
|
||||
logic [`FLEN*4+7:0] TestVectors[6133248:0]; // list of test vectors
|
||||
|
||||
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
|
||||
logic [1:0] FmtVal; // value of the current Fmt
|
||||
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
|
||||
logic WriteIntVal; // value of the current WriteInt
|
||||
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat
|
||||
logic [`XLEN-1:0] SrcA; // integer input
|
||||
logic [`FLEN-1:0] Ans; // correct answer from TestFloat
|
||||
logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
|
||||
logic [`FLEN-1:0] Res; // result from other units
|
||||
logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
|
||||
logic [4:0] AnsFlg; // correct flags read from testfloat
|
||||
logic [4:0] FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
|
||||
logic [4:0] ResFlg; // Result flags
|
||||
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
|
||||
logic [`FMTBITS-1:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
|
||||
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
|
||||
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
|
||||
logic [4:0] ResFlg, Flg; // Result flags
|
||||
logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
|
||||
logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit
|
||||
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
|
||||
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
|
||||
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
|
||||
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
|
||||
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
|
||||
logic AnsNaN, ResNaN, NaNGood;
|
||||
logic XSgn, YSgn, ZSgn; // sign of the inputs
|
||||
logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
|
||||
logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
|
||||
logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
|
||||
logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
|
||||
logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
|
||||
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
|
||||
logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp;
|
||||
logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp;
|
||||
logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp;
|
||||
logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp;
|
||||
logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
|
||||
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
|
||||
logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan;
|
||||
logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan;
|
||||
logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan;
|
||||
logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan;
|
||||
logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
|
||||
logic XNaN, YNaN, ZNaN; // is the input NaN
|
||||
logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
|
||||
logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
|
||||
logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
|
||||
logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
|
||||
logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
|
||||
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
|
||||
logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
|
||||
logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
|
||||
logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
|
||||
logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
|
||||
logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
|
||||
logic XDenorm, ZDenorm; // is the input denormalized
|
||||
logic FmaRneXDenorm, FmaRneZDenorm;
|
||||
logic FmaRzXDenorm, FmaRzZDenorm;
|
||||
logic FmaRuXDenorm, FmaRuZDenorm;
|
||||
logic FmaRdXDenorm, FmaRdZDenorm;
|
||||
logic FmaRnmXDenorm, FmaRnmZDenorm;
|
||||
logic XInf, YInf, ZInf; // is the input infinity
|
||||
logic FmaRneXInf, FmaRneYInf, FmaRneZInf;
|
||||
logic FmaRzXInf, FmaRzYInf, FmaRzZInf;
|
||||
logic FmaRuXInf, FmaRuYInf, FmaRuZInf;
|
||||
logic FmaRdXInf, FmaRdYInf, FmaRdZInf;
|
||||
logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
|
||||
logic XZero, YZero, ZZero; // is the input zero
|
||||
logic FmaRneXZero, FmaRneYZero, FmaRneZZero;
|
||||
logic FmaRzXZero, FmaRzYZero, FmaRzZZero;
|
||||
logic FmaRuXZero, FmaRuYZero, FmaRuZZero;
|
||||
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
|
||||
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
|
||||
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
|
||||
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
|
||||
logic IntZeroE;
|
||||
logic CvtResSgnE;
|
||||
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
|
||||
logic [`NE:0] CvtCalcExpE; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic CvtResDenormUfE;
|
||||
|
||||
|
||||
// in-between FMA signals
|
||||
logic Mult;
|
||||
logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
|
||||
logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
|
||||
logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
|
||||
logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;
|
||||
logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
|
||||
logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
|
||||
logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
|
||||
logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
|
||||
logic [`NE+1:0] ProdExpE;
|
||||
logic AddendStickyE;
|
||||
logic KillProdE;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE;
|
||||
logic [3*`NF+5:0] SumE;
|
||||
logic InvZE;
|
||||
logic NegSumE;
|
||||
logic ZSgnEffE;
|
||||
logic PSgnE;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -282,15 +226,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
|
||||
// add the format for the Fma
|
||||
FmaFmt = {FmaFmt, 2'b11};
|
||||
Tests = {Tests, f128fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`D_SUPPORTED) begin // if double precision is supported
|
||||
@ -411,14 +353,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b01};
|
||||
Tests = {Tests, f64fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`F_SUPPORTED) begin // if single precision being supported
|
||||
@ -523,14 +464,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b00};
|
||||
Tests = {Tests, f32fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`ZFH_SUPPORTED) begin // if half precision supported
|
||||
@ -617,19 +557,18 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b10};
|
||||
Tests = {Tests, f16fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// check if nothing is being tested
|
||||
if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
|
||||
if (Tests.size() == 0) begin
|
||||
$display("TEST %s not supported in this configuration", TEST);
|
||||
$stop;
|
||||
end
|
||||
@ -648,26 +587,17 @@ module testbenchfp;
|
||||
// Read the first test
|
||||
initial begin
|
||||
$display("\n\nRunning %s vectors", Tests[TestNum]);
|
||||
$display("Running FMA precision %d", FmaTestNum);
|
||||
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
// set the test index to 0
|
||||
TestNum = 0;
|
||||
FmaTestNum = 0;
|
||||
end
|
||||
|
||||
// set a the signals for all tests
|
||||
always_comb FmaFmtVal = FmaFmt[FmaTestNum];
|
||||
always_comb UnitVal = Unit[TestNum];
|
||||
always_comb FmtVal = Fmt[TestNum];
|
||||
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
|
||||
always_comb WriteIntVal = WriteInt[OpCtrlNum];
|
||||
always_comb FrmVal = Frm[FrmNum];
|
||||
assign Mult = OpCtrlVal === 3'b100;
|
||||
|
||||
// modify the format signal if only 2 percisions supported
|
||||
// - 1 for the larger precision
|
||||
@ -675,61 +605,9 @@ module testbenchfp;
|
||||
always_comb begin
|
||||
if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
|
||||
else ModFmt = FmtVal;
|
||||
if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
|
||||
else FmaModFmt = FmaFmtVal;
|
||||
end
|
||||
|
||||
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
|
||||
readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg),
|
||||
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
|
||||
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
|
||||
.XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
.XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
|
||||
readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg),
|
||||
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
|
||||
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
|
||||
.XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
.XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
|
||||
readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg),
|
||||
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
|
||||
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
|
||||
.XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
.XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
|
||||
readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg),
|
||||
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
|
||||
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
|
||||
.XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
.XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
|
||||
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
|
||||
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
|
||||
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
|
||||
.XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm),
|
||||
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
|
||||
.XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
|
||||
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
|
||||
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
|
||||
@ -754,124 +632,30 @@ module testbenchfp;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// instantiate devices under test
|
||||
// - one fma for each precison
|
||||
// - all the units for the other tests (including fma for add/sub/mul)
|
||||
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
|
||||
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
|
||||
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
|
||||
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
|
||||
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
|
||||
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
|
||||
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
|
||||
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
|
||||
.XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf),
|
||||
.XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN),
|
||||
.KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp),
|
||||
.SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff),
|
||||
.PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE),
|
||||
.FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
|
||||
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
|
||||
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
|
||||
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
|
||||
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
|
||||
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
|
||||
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
|
||||
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
|
||||
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
|
||||
.XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf),
|
||||
.XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN),
|
||||
.KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp),
|
||||
.SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff),
|
||||
.PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ),
|
||||
.FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
|
||||
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
|
||||
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
|
||||
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
|
||||
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
|
||||
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
|
||||
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
|
||||
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
|
||||
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
|
||||
.XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf),
|
||||
.XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN),
|
||||
.KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp),
|
||||
.SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff),
|
||||
.PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU),
|
||||
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
|
||||
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
|
||||
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
|
||||
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
|
||||
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
|
||||
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
|
||||
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
|
||||
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
|
||||
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
|
||||
.XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf),
|
||||
.XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN),
|
||||
.KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp),
|
||||
.SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff),
|
||||
.PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD),
|
||||
.FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
|
||||
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
|
||||
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
|
||||
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
|
||||
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
|
||||
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
|
||||
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
|
||||
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
|
||||
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
|
||||
.XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf),
|
||||
.XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN),
|
||||
.KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp),
|
||||
.SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff),
|
||||
.PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM),
|
||||
.FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));
|
||||
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
|
||||
fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
|
||||
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
|
||||
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf),
|
||||
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN),
|
||||
|
||||
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
|
||||
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
|
||||
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
|
||||
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
|
||||
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
|
||||
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
|
||||
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
|
||||
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
|
||||
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
|
||||
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
|
||||
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
|
||||
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
|
||||
// .CvtRes, .CvtFlgE);
|
||||
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//Check if the correct answer and result is a NaN
|
||||
always_comb begin
|
||||
case (FmaFmtVal)
|
||||
4'b11: begin // quad
|
||||
FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
|
||||
end
|
||||
4'b01: begin // double
|
||||
FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
|
||||
end
|
||||
4'b00: begin // single
|
||||
FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
|
||||
end
|
||||
4'b10: begin // half
|
||||
FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
always_comb begin
|
||||
if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
|
||||
// an integer output can't be a NaN
|
||||
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
|
||||
always_comb begin
|
||||
// select the result to check
|
||||
case (UnitVal)
|
||||
`FMAUNIT: Res = FmaRes;
|
||||
`DIVUNIT: Res = DivRes;
|
||||
`FMAUNIT: Res = FpRes;
|
||||
`DIVUNIT: Res = FpRes;
|
||||
`CMPUNIT: Res = CmpRes;
|
||||
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
|
||||
`CVTFPUNIT: Res = CvtRes;
|
||||
`CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
|
||||
`CVTFPUNIT: Res = FpRes;
|
||||
endcase
|
||||
|
||||
// select the flag to check
|
||||
case (UnitVal)
|
||||
`FMAUNIT: ResFlg = FmaFlg;
|
||||
`DIVUNIT: ResFlg = DivFlg;
|
||||
`FMAUNIT: ResFlg = Flg;
|
||||
`DIVUNIT: ResFlg = Flg;
|
||||
`CMPUNIT: ResFlg = CmpFlg;
|
||||
`CVTINTUNIT: ResFlg = CvtFlg;
|
||||
`CVTFPUNIT: ResFlg = CvtFlg;
|
||||
`CVTINTUNIT: ResFlg = Flg;
|
||||
`CVTFPUNIT: ResFlg = Flg;
|
||||
endcase
|
||||
end
|
||||
// check results on falling edge of clk
|
||||
@ -1027,117 +757,6 @@ end
|
||||
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
|
||||
// - the sign of the NaN does not matter for the opperations being tested
|
||||
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
|
||||
case (FmtVal)
|
||||
4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
@ -1221,77 +840,8 @@ end
|
||||
$stop;
|
||||
end
|
||||
|
||||
// check if the fma tests are correct
|
||||
if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RNE");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RZ");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RU");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RD");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RNM");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
|
||||
VectorNum += 1; // increment the vector
|
||||
FmaVectorNum += 1; // increment the vector
|
||||
|
||||
// check to see if there more vectors in this test
|
||||
// *** fix this so that fma and other run sepratly - re-add fma num
|
||||
if ((FmaRneVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRzVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRuVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRdVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
|
||||
|
||||
// increment the test
|
||||
FmaTestNum += 1;
|
||||
|
||||
// clear the vectors
|
||||
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
// read next files
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
|
||||
// set the vector index back to 0
|
||||
FmaVectorNum = 0;
|
||||
|
||||
// if no more Tests - finish
|
||||
if(Tests[TestNum] === "" &
|
||||
FmaRneTests[FmaTestNum] === "" &
|
||||
FmaRzTests[FmaTestNum] === "" &
|
||||
FmaRuTests[FmaTestNum] === "" &
|
||||
FmaRdTests[FmaTestNum] === "" &
|
||||
FmaRnmTests[FmaTestNum] === "") begin
|
||||
$display("\nAll Tests completed with %d errors\n", errors);
|
||||
$stop;
|
||||
end
|
||||
|
||||
$display("Running FMA precision %d", FmaTestNum);
|
||||
end
|
||||
|
||||
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
|
||||
|
||||
@ -1299,14 +849,9 @@ end
|
||||
TestNum += 1;
|
||||
|
||||
// clear the vectors
|
||||
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
// read next files
|
||||
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
|
||||
// set the vector index back to 0
|
||||
VectorNum = 0;
|
||||
@ -1317,12 +862,7 @@ end
|
||||
else FrmNum = 0;
|
||||
|
||||
// if no more Tests - finish
|
||||
if(Tests[TestNum] === "" &
|
||||
FmaRneTests[FmaTestNum] === "" &
|
||||
FmaRzTests[FmaTestNum] === "" &
|
||||
FmaRuTests[FmaTestNum] === "" &
|
||||
FmaRdTests[FmaTestNum] === "" &
|
||||
FmaRnmTests[FmaTestNum] === "") begin
|
||||
if(Tests[TestNum] === "") begin
|
||||
$display("\nAll Tests completed with %d errors\n", errors);
|
||||
$stop;
|
||||
end
|
||||
@ -1335,89 +875,6 @@ endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module readfmavectors (
|
||||
input logic clk,
|
||||
input logic [`FMTBITS-1:0] FmaModFmt, // the modified format
|
||||
input logic [1:0] FmaFmt, // the format of the FMA inputs
|
||||
input logic [`FLEN*4+7:0] TestVector, // the test vector
|
||||
output logic [`FLEN-1:0] Ans, // the correct answer
|
||||
output logic [4:0] AnsFlg, // the correct flag
|
||||
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
|
||||
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
|
||||
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
|
||||
output logic XDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic [`FLEN-1:0] X, Y, Z // inputs
|
||||
);
|
||||
|
||||
logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
|
||||
// apply test vectors on rising edge of clk
|
||||
// Format of vectors Inputs(1/2/3)_AnsFlg
|
||||
always @(posedge clk) begin
|
||||
#1;
|
||||
AnsFlg = TestVector[4:0];
|
||||
case (FmaFmt)
|
||||
2'b11: begin // quad
|
||||
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
|
||||
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
|
||||
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
|
||||
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
|
||||
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XExpMaxE, .ZDenormE);
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module readvectors (
|
||||
input logic clk,
|
||||
input logic [`FLEN*4+7:0] TestVector,
|
||||
@ -1451,33 +908,61 @@ module readvectors (
|
||||
`FMAUNIT:
|
||||
case (Fmt)
|
||||
2'b11: begin // quad
|
||||
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
|
||||
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
|
||||
end
|
||||
else begin
|
||||
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
|
||||
end
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
|
||||
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
|
||||
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
@ -1532,19 +1017,19 @@ module readvectors (
|
||||
2'b11: begin // quad
|
||||
case (OpCtrl[1:0])
|
||||
2'b11: begin // quad
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
@ -1628,12 +1113,12 @@ module readvectors (
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // quad -> long
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // quad -> int
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
|
||||
X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
|
||||
end
|
||||
|
@ -396,6 +396,7 @@ module riscvassertions;
|
||||
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
|
||||
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
|
||||
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
|
||||
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
|
||||
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
|
||||
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
|
||||
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
|
||||
@ -418,6 +419,7 @@ module riscvassertions;
|
||||
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
|
||||
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
|
||||
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
|
||||
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
`define ADD_OPCTRL 3'b110
|
||||
`define MUL_OPCTRL 3'b100
|
||||
`define SUB_OPCTRL 3'b111
|
||||
`define FADD_OPCTRL 3'b000
|
||||
`define FMA_OPCTRL 3'b000
|
||||
`define DIV_OPCTRL 3'b000
|
||||
`define SQRT_OPCTRL 3'b001
|
||||
`define LE_OPCTRL 3'b011
|
||||
@ -21,11 +21,11 @@
|
||||
`define RU 3'b011
|
||||
`define RD 3'b010
|
||||
`define RNM 3'b100
|
||||
`define FMAUNIT 0
|
||||
`define FMAUNIT 2
|
||||
`define DIVUNIT 1
|
||||
`define CVTINTUNIT 2
|
||||
`define CVTFPUNIT 3
|
||||
`define CMPUNIT 4
|
||||
`define CVTINTUNIT 0
|
||||
`define CVTFPUNIT 4
|
||||
`define CMPUNIT 3
|
||||
|
||||
string f16rv32cvtint[] = '{
|
||||
"ui32_to_f16_rne.tv",
|
||||
|
@ -1102,11 +1102,11 @@ string imperas32f[] = '{
|
||||
// "rv64i_m/D/d_fdiv_b20-01", // looks like flags
|
||||
// "rv64i_m/D/d_fdiv_b2-01", // also flags
|
||||
// "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
|
||||
"rv64i_m/D/d_fdiv_b3-01",
|
||||
// "rv64i_m/D/d_fdiv_b3-01",
|
||||
// "rv64i_m/D/d_fdiv_b4-01", // flags
|
||||
"rv64i_m/D/d_fdiv_b5-01",
|
||||
// "rv64i_m/D/d_fdiv_b5-01",
|
||||
// "rv64i_m/D/d_fdiv_b6-01", // flags
|
||||
"rv64i_m/D/d_fdiv_b7-01",
|
||||
// "rv64i_m/D/d_fdiv_b7-01",
|
||||
// "rv64i_m/D/d_fdiv_b8-01", // flags
|
||||
// "rv64i_m/D/d_fdiv_b9-01", might be a flag too
|
||||
"rv64i_m/D/d_feq_b1-01",
|
||||
|
@ -106,7 +106,7 @@ def getVals(tech, module, var, freq=None):
|
||||
|
||||
if (freq != None):
|
||||
for oneSynth in allSynths:
|
||||
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
|
||||
if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1):
|
||||
widthL += [oneSynth.width]
|
||||
osdict = oneSynth._asdict()
|
||||
metric += [osdict[var]]
|
||||
@ -151,33 +151,37 @@ def csvOfBest():
|
||||
file.close()
|
||||
return bestSynths
|
||||
|
||||
def genLegend(fits, coefs, r2, spec, ale=False):
|
||||
''' generates a list of two legend elements
|
||||
labels line with fit equation and dots with tech and r squared of the fit
|
||||
def genLegend(fits, coefs, r2=None, spec=None, ale=False):
|
||||
''' generates a list of two legend elements (or just an equation if no r2 or spec)
|
||||
labels line with fit equation and dots with r squared of the fit
|
||||
'''
|
||||
|
||||
coefsr = [str(round(c, 3)) for c in coefs]
|
||||
|
||||
eq = ''
|
||||
ind = 0
|
||||
|
||||
eqDict = {'c': '', 'l': 'N', 's': '$N^2$', 'g': '$log_2$(N)', 'n': 'N$log_2$(N)'}
|
||||
coefsr = [str(sigfig(c, 2)) for c in coefs]
|
||||
if ale:
|
||||
if (normAddWidth == 32):
|
||||
eqDict = {'c': '', 'l': '(N/32)', 's': '$(N/32)^2$', 'g': '$log_2$(N/32)', 'n': '(N/32)$log_2$(N/32)'}
|
||||
sub = 'S'
|
||||
elif normAddWidth != 1:
|
||||
print('Legend equations are wrong')
|
||||
print('Equations are wrong, check normAddWidth')
|
||||
else:
|
||||
sub = 'N'
|
||||
|
||||
eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'}
|
||||
eq = ''
|
||||
ind = 0
|
||||
|
||||
for k in eqDict.keys():
|
||||
if k in fits:
|
||||
if str(coefsr[ind]) != '0.0': eq += " + " + coefsr[ind] + eqDict[k]
|
||||
if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k]
|
||||
ind += 1
|
||||
|
||||
eq = eq[3:] # chop off leading ' + '
|
||||
|
||||
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
|
||||
legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +' $R^2$='+ str(round(r2, 4)))]
|
||||
return legend_elements
|
||||
if (r2==None) or (spec==None):
|
||||
return eq
|
||||
else:
|
||||
legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
|
||||
legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))]
|
||||
return legend_elements
|
||||
|
||||
def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None):
|
||||
''' module: string module name
|
||||
@ -197,9 +201,14 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
|
||||
allMetrics = []
|
||||
|
||||
ale = (var != 'delay') # if not delay, must be area, leakage, or energy
|
||||
modFit = fitDict[mod]
|
||||
modFit = fitDict[module]
|
||||
fits = modFit[ale]
|
||||
|
||||
if freq:
|
||||
ls = '--'
|
||||
else:
|
||||
ls = '-'
|
||||
|
||||
for spec in techSpecs:
|
||||
metric = getVals(spec.tech, module, var, freq=freq)
|
||||
|
||||
@ -209,26 +218,26 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
|
||||
metric = [m/norm for m in metric]
|
||||
|
||||
if len(metric) == 5: # don't include the spec if we don't have points for all widths
|
||||
xp, pred, coefs, r2 = regress(widths, metric, fits)
|
||||
xp, pred, coefs, r2 = regress(widths, metric, fits, ale)
|
||||
fullLeg += genLegend(fits, coefs, r2, spec, ale=ale)
|
||||
c = color if color else spec.color
|
||||
ax.scatter(widths, metric, color=c, marker=spec.shape)
|
||||
ax.plot(xp, pred, color=c)
|
||||
ax.plot(xp, pred, color=c, linestyle=ls)
|
||||
allWidths += widths
|
||||
allMetrics += metric
|
||||
|
||||
combined = TechSpec('combined', 'red', '_', 0, 0, 0, 0)
|
||||
xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits)
|
||||
leg = genLegend(fits, coefs, r2, combined, ale=ale)
|
||||
fullLeg += leg
|
||||
ax.plot(xp, pred, color='red')
|
||||
ax.plot(xp, pred, color='red', linestyle=ls)
|
||||
|
||||
if norm:
|
||||
ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"}
|
||||
else:
|
||||
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (fJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
|
||||
|
||||
ax.legend(handles=fullLeg)
|
||||
# fullLeg += genLegend(fits, coefs, r2, combined, ale=ale)
|
||||
# legLoc = 'upper left' if ale else 'center right'
|
||||
# ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc))
|
||||
|
||||
ax.set_xticks(widths)
|
||||
ax.set_xlabel("Width (bits)")
|
||||
ax.set_ylabel(ylabeldic[var])
|
||||
@ -243,15 +252,20 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
|
||||
ax.set_title(module + titleStr)
|
||||
plt.savefig('./plots/PPA/'+ module + '_' + var + '.png')
|
||||
# plt.show()
|
||||
return fullLeg
|
||||
return r2
|
||||
|
||||
def regress(widths, var, fits='clsgn'):
|
||||
def regress(widths, var, fits='clsgn', ale=False):
|
||||
''' fits a curve to the given points
|
||||
returns lists of x and y values to plot that curve and legend elements with the equation
|
||||
returns lists of x and y values to plot that curve and coefs for the eq with r2
|
||||
'''
|
||||
|
||||
funcArr = genFuncs(fits)
|
||||
widths = [w/normAddWidth for w in widths]
|
||||
xp = np.linspace(4, 140, 200)
|
||||
xpToCalc = xp
|
||||
|
||||
if ale:
|
||||
widths = [w/normAddWidth for w in widths]
|
||||
xpToCalc = [x/normAddWidth for x in xp]
|
||||
|
||||
mat = []
|
||||
for w in widths:
|
||||
@ -262,53 +276,91 @@ def regress(widths, var, fits='clsgn'):
|
||||
|
||||
y = np.array(var, dtype=np.float)
|
||||
coefs = opt.nnls(mat, y)[0]
|
||||
|
||||
yp = []
|
||||
for w in widths:
|
||||
n = [func(w) for func in funcArr]
|
||||
yp += [sum(np.multiply(coefs, n))]
|
||||
r2 = skm.r2_score(y, yp)
|
||||
|
||||
xp = np.linspace(4, 140, 200)
|
||||
pred = []
|
||||
for x in xp:
|
||||
n = [func(x/normAddWidth) for func in funcArr]
|
||||
for x in xpToCalc:
|
||||
n = [func(x) for func in funcArr]
|
||||
pred += [sum(np.multiply(coefs, n))]
|
||||
|
||||
return xp, pred, coefs, r2
|
||||
|
||||
def makeCoefTable():
|
||||
'''
|
||||
writes CSV with each line containing the coefficients for a regression fit
|
||||
''' writes CSV with each line containing the coefficients for a regression fit
|
||||
to a particular combination of module, metric (including both techs, normalized)
|
||||
'''
|
||||
file = open("ppaFitting.csv", "w")
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(['Module', 'Metric', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
|
||||
writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
|
||||
|
||||
for module in modules:
|
||||
for var in ['delay', 'area', 'lpower', 'denergy']:
|
||||
ale = (var != 'delay')
|
||||
metL = []
|
||||
modFit = fitDict[module]
|
||||
fits = modFit[ale]
|
||||
for freq in [10, None]:
|
||||
target = 'easy' if freq else 'hard'
|
||||
for var in ['delay', 'area', 'lpower', 'denergy']:
|
||||
ale = (var != 'delay')
|
||||
metL = []
|
||||
modFit = fitDict[module]
|
||||
fits = modFit[ale]
|
||||
|
||||
for spec in techSpecs:
|
||||
metric = getVals(spec.tech, module, var)
|
||||
techdict = spec._asdict()
|
||||
norm = techdict[var]
|
||||
metL += [m/norm for m in metric]
|
||||
for spec in techSpecs:
|
||||
metric = getVals(spec.tech, module, var, freq=freq)
|
||||
techdict = spec._asdict()
|
||||
norm = techdict[var]
|
||||
metL += [m/norm for m in metric]
|
||||
|
||||
xp, pred, coefs, r2 = regress(widths*2, metL, fits)
|
||||
coefs = np.ndarray.tolist(coefs)
|
||||
coefsToWrite = [None]*5
|
||||
fitTerms = 'clsgn'
|
||||
ind = 0
|
||||
for i in range(len(fitTerms)):
|
||||
if fitTerms[i] in fits:
|
||||
coefsToWrite[i] = coefs[ind]
|
||||
ind += 1
|
||||
row = [module, var] + coefsToWrite + [r2]
|
||||
writer.writerow(row)
|
||||
xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
|
||||
coefs = np.ndarray.tolist(coefs)
|
||||
coefsToWrite = [None]*5
|
||||
fitTerms = 'clsgn'
|
||||
ind = 0
|
||||
for i in range(len(fitTerms)):
|
||||
if fitTerms[i] in fits:
|
||||
coefsToWrite[i] = coefs[ind]
|
||||
ind += 1
|
||||
row = [module, var, target] + coefsToWrite + [r2]
|
||||
writer.writerow(row)
|
||||
|
||||
file.close()
|
||||
|
||||
def sigfig(num, figs):
|
||||
return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs)))
|
||||
|
||||
def makeEqTable():
|
||||
''' writes CSV with each line containing the equations for fits for each metric
|
||||
to a particular module (including both techs, normalized)
|
||||
'''
|
||||
file = open("ppaEquations.csv", "w")
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy'])
|
||||
|
||||
for module in modules:
|
||||
eqs = []
|
||||
for freq in [None, 10]:
|
||||
for var in ['delay', 'area', 'lpower', 'denergy']:
|
||||
if (var == 'delay') and (freq == 10):
|
||||
pass
|
||||
else:
|
||||
ale = (var != 'delay')
|
||||
metL = []
|
||||
modFit = fitDict[module]
|
||||
fits = modFit[ale]
|
||||
|
||||
for spec in techSpecs:
|
||||
metric = getVals(spec.tech, module, var, freq=freq)
|
||||
techdict = spec._asdict()
|
||||
norm = techdict[var]
|
||||
metL += [m/norm for m in metric]
|
||||
|
||||
xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
|
||||
coefs = np.ndarray.tolist(coefs)
|
||||
eqs += [genLegend(fits, coefs, ale=ale)]
|
||||
row = [module] + eqs
|
||||
writer.writerow(row)
|
||||
|
||||
file.close()
|
||||
|
||||
@ -369,7 +421,7 @@ def freqPlot(tech, mod, width):
|
||||
delays = delaysL[ind]
|
||||
freqs = freqsL[ind]
|
||||
|
||||
freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses
|
||||
# freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses
|
||||
|
||||
c = 'blue' if ind else 'green'
|
||||
# adprod = adprodpow(areas, delays, 1)
|
||||
@ -383,14 +435,18 @@ def freqPlot(tech, mod, width):
|
||||
lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]
|
||||
|
||||
ax1.legend(handles=legend_elements)
|
||||
width = str(width)
|
||||
|
||||
ax2.set_xlabel("Target Freq (MHz)")
|
||||
ax1.set_ylabel('Delay (ns)')
|
||||
ax2.set_ylabel('Area (sq microns)')
|
||||
# ax3.set_ylabel('Area * Delay')
|
||||
# ax4.set_ylabel('Area * $Delay^2$')
|
||||
ax1.set_title(mod + '_' + str(width))
|
||||
plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + str(width) + '.png')
|
||||
ax1.set_title(mod + '_' + width)
|
||||
if ('mux' in mod) & ('d' in mod):
|
||||
width = mod
|
||||
mod = 'muxd'
|
||||
plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png')
|
||||
# plt.show()
|
||||
|
||||
def squareAreaDelay(tech, mod, width):
|
||||
@ -485,30 +541,35 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False):
|
||||
'''
|
||||
plt.rcParams["figure.figsize"] = (10,7)
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
# fig, axs = plt.subplots(4, 1)
|
||||
|
||||
# oneMetricPlot(mod, 'delay', ax=axs[0], fits=modFit[0], freq=freq, norm=norm)
|
||||
# oneMetricPlot(mod, 'area', ax=axs[1], fits=modFit[1], freq=freq, norm=norm)
|
||||
# oneMetricPlot(mod, 'lpower', ax=axs[2], fits=modFit[1], freq=freq, norm=norm)
|
||||
# oneMetricPlot(mod, 'denergy', ax=axs[3], fits=modFit[1], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'delay', ax=axs[0,0], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'area', ax=axs[0,1], freq=freq, norm=norm)
|
||||
oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=freq, norm=norm)
|
||||
fullLeg = oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=freq, norm=norm)
|
||||
arr = [['delay', 'area'], ['lpower', 'denergy']]
|
||||
|
||||
freqs = [freq]
|
||||
if aleOpt: freqs += [10]
|
||||
|
||||
for i in [0, 1]:
|
||||
for j in [0, 1]:
|
||||
leg = []
|
||||
for f in freqs:
|
||||
if (arr[i][j]=='delay') and (f==10):
|
||||
pass
|
||||
else:
|
||||
r2 = oneMetricPlot(mod, arr[i][j], ax=axs[i, j], freq=f, norm=norm)
|
||||
ls = '--' if f else '-'
|
||||
leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)]
|
||||
axs[i, j].legend(handles=leg)
|
||||
|
||||
if aleOpt:
|
||||
oneMetricPlot(mod, 'area', ax=axs[0,1], freq=10, norm=norm, color='black')
|
||||
oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=10, norm=norm, color='black')
|
||||
oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=10, norm=norm, color='black')
|
||||
|
||||
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
|
||||
n = 'normalized' if norm else 'unnormalized'
|
||||
saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
|
||||
titleStr = " (target " + str(freq)+ "MHz)" if freq != None else ""
|
||||
plt.suptitle(mod + titleStr)
|
||||
|
||||
# fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))
|
||||
fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')]
|
||||
fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')]
|
||||
fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))
|
||||
|
||||
if freq != 10: plt.savefig(saveStr)
|
||||
if freq != 10:
|
||||
n = 'normalized' if norm else 'unnormalized'
|
||||
saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
|
||||
plt.savefig(saveStr)
|
||||
# plt.show()
|
||||
|
||||
def plotBestAreas(mod):
|
||||
@ -533,16 +594,17 @@ if __name__ == '__main__':
|
||||
##############################
|
||||
# set up stuff, global variables
|
||||
widths = [8, 16, 32, 64, 128]
|
||||
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult']
|
||||
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] #, 'mux2d', 'mux4d', 'mux8d',]
|
||||
normAddWidth = 32 # divisor to use with N since normalizing to add_32
|
||||
|
||||
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
|
||||
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 'ls', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
|
||||
fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l']))
|
||||
leftblue = [['mux2', 'sky90', 32], ['mux2', 'sky90', 64], ['mux2', 'sky90', 128], ['mux8', 'sky90', 32], ['mux2', 'tsmc28', 8], ['mux2', 'tsmc28', 64]]
|
||||
|
||||
TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
|
||||
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1330.84, 582.81, 520.66], ['tsmc28', 'blue', '^', 12.2e-3, 209.29, 1060, 81.43]]
|
||||
techSpecs = [TechSpec(*t) for t in techSpecs]
|
||||
combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0)
|
||||
# invz1arealeakage = [['sky90', 1.96, 1.98], ['gf32', .351, .3116], ['tsmc28', .252, 1.09]] #['gf32', 'purple', 's', 15e-3]
|
||||
##############################
|
||||
|
||||
@ -556,13 +618,14 @@ if __name__ == '__main__':
|
||||
# squareAreaDelay('sky90', 'add', 32)
|
||||
# oneMetricPlot('add', 'delay')
|
||||
# freqPlot('sky90', 'mux4', 16)
|
||||
# plotBestAreas('add')
|
||||
# makeCoefTable()
|
||||
# makeEqTable()
|
||||
|
||||
for mod in ['mux2']: #modules:
|
||||
for mod in modules:
|
||||
plotPPA(mod, norm=False)
|
||||
plotPPA(mod) #, aleOpt=True)
|
||||
# plotBestAreas(mod)
|
||||
# for w in [8, 16, 32, 64, 128]:
|
||||
# freqPlot('sky90', mod, w)
|
||||
# freqPlot('tsmc28', mod, w)
|
||||
plotPPA(mod, aleOpt=True)
|
||||
for w in [8, 16, 32, 64, 128]:
|
||||
freqPlot('sky90', mod, w)
|
||||
freqPlot('tsmc28', mod, w)
|
||||
plt.close('all')
|
11
synthDC/ppaEquations.csv
Normal file
11
synthDC/ppaEquations.csv
Normal file
@ -0,0 +1,11 @@
|
||||
Element,Best delay,Fast area,Fast leakage,Fast energy,Small area,Small leakage,Small energy
|
||||
priorityencoder,0.98$log_2$(N),0.33S,0.25S,0.093S,0.15S,0.046S,0.00046S
|
||||
add,1.8 + 1.4$log_2$(N),1.1S,0.95S,1S,0.34S,0.16S,0.025S
|
||||
csa,3.6,0.93S,1.5S,1.1S,0.34S,0.16S,0.00055S
|
||||
shiftleft,0.48 + 1.6$log_2$(N),1.9S,2.3S,1.5S,0.8S,0.29S,0.0059S
|
||||
comparator,2 + 0.94$log_2$(N),0.6S,0.47S,0.31S,0.34S,0.16S,0.00089S
|
||||
flop,3.3,0.34S,0.37S,0.0012S,0.34S,0.37S,0.0012S
|
||||
mux2,2.8 + 0.38$log_2$(N),0.2S,0.18S,0.16S,0.15S,0.12S,0.0011S
|
||||
mux4,3.1 + 0.51$log_2$(N),0.36S,0.32S,0.28S,0.28S,0.11S,0.0021S
|
||||
mux8,5 + 0.45$log_2$(N),0.76S,0.66S,0.45S,0.55S,0.24S,0.0029S
|
||||
mult,6$log_2$(N),13S + 10$S^2$,26S + 7.3$S^2$,42S + 25$S^2$,1.1S + 7.9$S^2$,1S + 3.4$S^2$,2.1$S^2$
|
|
@ -74,7 +74,7 @@ if { $saifpower == 1 } {
|
||||
if {$drive != "INV"} {
|
||||
set_false_path -from [get_ports reset]
|
||||
}
|
||||
if {(($::env(DESIGN) == "ppa_mux2_1") || ($::env(DESIGN) == "ppa_mux4_1") || ($::env(DESIGN) == "ppa_mux8_1"))} {
|
||||
if {(($::env(DESIGN) == "ppa_mux2d_1") || ($::env(DESIGN) == "ppa_mux4d_1") || ($::env(DESIGN) == "ppa_mux8d_1"))} {
|
||||
set_false_path -from {s}
|
||||
}
|
||||
|
||||
|
@ -2,482 +2,482 @@
|
||||
BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
|
||||
OUTPUT="./vectors"
|
||||
echo "Creating ui32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
|
||||
echo "Creating ui32_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
|
||||
echo "Creating ui32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
|
||||
echo "Creating ui32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
|
||||
echo "Creating ui64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
|
||||
echo "Creating ui64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
|
||||
echo "Creating ui64_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
|
||||
echo "Creating ui64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
|
||||
echo "Creating i32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
|
||||
echo "Creating i32_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
|
||||
echo "Creating i32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
|
||||
echo "Creating i32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
|
||||
echo "Creating i64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
|
||||
echo "Creating i64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
|
||||
echo "Creating i64_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
|
||||
echo "Creating i64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
|
||||
echo "Creating f16_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
echo "Creating f32_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
echo "Creating f64_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
echo "Creating f128_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
echo "Creating f16_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
echo "Creating f32_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
echo "Creating f64_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
echo "Creating f128_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
echo "Creating f16_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
echo "Creating f32_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
echo "Creating f64_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
echo "Creating f128_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
echo "Creating f16_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
echo "Creating f32_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
echo "Creating f64_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
echo "Creating f128_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
echo "Creating f16_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
|
||||
echo "Creating f16_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
|
||||
echo "Creating f16_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
|
||||
echo "Creating f32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
|
||||
echo "Creating f32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
|
||||
echo "Creating f32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
|
||||
echo "Creating f64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
|
||||
echo "Creating f64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
|
||||
echo "Creating f64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
|
||||
echo "Creating f128_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
|
||||
echo "Creating f128_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
|
||||
echo "Creating f128_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
|
||||
echo "Creating f16_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
|
||||
echo "Creating f32_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
|
||||
echo "Creating f64_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
|
||||
echo "Creating f128_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
|
||||
echo "Creating f16_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
|
||||
echo "Creating f32_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
|
||||
echo "Creating f64_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
|
||||
echo "Creating f128_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
|
||||
echo "Creating f16_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
|
||||
echo "Creating f32_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
|
||||
echo "Creating f64_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
|
||||
echo "Creating f128_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
|
||||
echo "Creating f16_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
|
||||
echo "Creating f32_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
|
||||
echo "Creating f64_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
|
||||
echo "Creating f128_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
|
||||
echo "Creating f16_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
|
||||
echo "Creating f32_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
|
||||
echo "Creating f64_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
|
||||
echo "Creating f128_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
|
||||
echo "Creating f16_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
|
||||
echo "Creating f32_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
|
||||
echo "Creating f64_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
|
||||
echo "Creating f128_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
|
||||
echo "Creating f16_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
|
||||
echo "Creating f32_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
|
||||
echo "Creating f64_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
|
||||
echo "Creating f128_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
|
||||
echo "Creating f16_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
|
||||
echo "Creating f32_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
|
||||
echo "Creating f64_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
|
||||
echo "Creating f128_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
|
||||
echo "Creating f16_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
|
||||
echo "Creating f32_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
|
||||
echo "Creating f64_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
|
||||
echo "Creating f128_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
|
||||
|
Loading…
Reference in New Issue
Block a user