Merge pull request #799 from davidharrishmc/dev

Parameterized FMA, fpcalc supports quad
This commit is contained in:
Rose Thompson 2024-05-14 13:17:32 -05:00 committed by GitHub
commit 1874226b60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 273 additions and 146 deletions

View File

@ -115,6 +115,9 @@ localparam CVTLEN = (ZFA_SUPPORTED & D_SUPPORTED) ? `max(BASECVTLEN, 32'd84) : B
localparam LLEN = `max($unsigned(FLEN), $unsigned(XLEN)); localparam LLEN = `max($unsigned(FLEN), $unsigned(XLEN));
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
// size of FMA output
localparam FMALEN = 3*NF + 6;
// NORMSHIFTSIZE is the bits out of the normalization shifter // NORMSHIFTSIZE is the bits out of the normalization shifter
// RV32F: max(32+23+1, 2(23)+4, 3(23)+6) = 3*23+6 = 75 // RV32F: max(32+23+1, 2(23)+4, 3(23)+6) = 3*23+6 = 75
// RV64F: max(64+23+1, 64 + 23 + 2, 3*23+6) = 89 // RV64F: max(64+23+1, 64 + 23 + 2, 3*23+6) = 89
@ -125,12 +128,10 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
// because NORMSHIFTSZ becomes limited by convert rather than divider // because NORMSHIFTSZ becomes limited by convert rather than divider
// The two extra bits are necessary because shiftcorrection dropped them for fcvt. // The two extra bits are necessary because shiftcorrection dropped them for fcvt.
// May be possible to remove these two bits by modifying shiftcorrection // May be possible to remove these two bits by modifying shiftcorrection
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+8)); //localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2));
//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+8)); localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2));
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits
// Disable spurious Verilator warnings // Disable spurious Verilator warnings

View File

@ -193,9 +193,9 @@ localparam cvw_t P = '{
CVTLEN : CVTLEN, CVTLEN : CVTLEN,
LLEN : LLEN, LLEN : LLEN,
LOGCVTLEN : LOGCVTLEN, LOGCVTLEN : LOGCVTLEN,
FMALEN : FMALEN,
NORMSHIFTSZ : NORMSHIFTSZ, NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
CORRSHIFTSZ : CORRSHIFTSZ,
LOGR : LOGR, LOGR : LOGR,
RK : RK, RK : RK,
FPDUR : FPDUR, FPDUR : FPDUR,

View File

@ -2,14 +2,12 @@
CC = gcc CC = gcc
CFLAGS = -O3 -Wno-format-overflow CFLAGS = -O3 -Wno-format-overflow
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than # Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior # the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a -lm -lquadmath
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/ #IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a #LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath
SRCS = $(wildcard *.c) SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS)) PROGS = $(patsubst %.c,%,$(SRCS))
@ -17,7 +15,7 @@ PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS) all: $(PROGS)
%: %.c %: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS) $(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean: clean:
rm -f $(PROGS) rm -f $(PROGS)

View File

@ -7,6 +7,8 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <inttypes.h>
#include <quadmath.h> // GCC Quad-Math Library
#include "softfloat.h" #include "softfloat.h"
#include "softfloat_types.h" #include "softfloat_types.h"
@ -26,6 +28,12 @@ typedef union dp {
double d; double d;
} dp; } dp;
typedef union qp {
uint64_t v64[2];
__uint128_t v;
__float128 q;
} qp;
int opSize = 0; int opSize = 0;
@ -140,6 +148,45 @@ void printF64(char *msg, float64_t f) {
// msg, conv.v, conv.d, sci, exp, fract); // msg, conv.v, conv.d, sci, exp, fract);
} }
void printF128 (char *msg, float128_t q) {
qp conv;
//__int128_t v128;
int i, j;
char buf[64];
//v128 = q.v[1];
//v128 = v128 << 64 | q.v[0]; // use union to convert between hexadecimal and floating-point views
//conv.v = v128;
conv.v64[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views
conv.v64[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views
printf("%s: ", msg); // print out nicely
// Some compilers can understand %Q for printf on quad precision instead of the
// API call of quadmath_snprintf
// printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q);
quadmath_snprintf (buf, sizeof buf, "%1.15Qe", conv.q);
//printf("0x%032" PRIx12 " = %s\n", q.v, buf);
printf("0x%016" PRIx64 "_%016" PRIx64 " = %s\n", q.v[1], q.v[0], buf);
}
void printF128val(float128_t q) {
qp conv;
//__int128_t v128;
int i, j;
char buf[64];
//v128 = q.v[1];
//v128 = v128 << 64 | q.v[0]; // use union to convert between hexadecimal and floating-point views
//conv.v = v128;
conv.v64[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views
conv.v64[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views
// Some compilers can understand %Q for printf on quad precision instead of the
// API call of quadmath_snprintf
// printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q);
//quadmath_snprintf (buf, sizeof buf, "%1.15Qe", conv.q);
printf("%016" PRIx64 "%016" PRIx64 "\n", q.v[1], q.v[0]);
}
void printFlags(void) { void printFlags(void) {
int NX = softfloat_exceptionFlags % 2; int NX = softfloat_exceptionFlags % 2;
int UF = (softfloat_exceptionFlags >> 1) % 2; int UF = (softfloat_exceptionFlags >> 1) % 2;
@ -160,14 +207,32 @@ void softfloatInit(void) {
softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess
} }
uint64_t parseNum(char *num) { __uint128_t strtoul128(char *num, int base) {
uint64_t result; __uint128_t result = 0;
int i;
for (i=0; i<strlen(num); i++) {
result = result * base;
if (num[i] >= '0' && num[i] <= '9') result += num[i] - '0';
else if (num[i] >= 'a' && num[i] <= 'f') result += num[i] - 'a' + 10;
else if (num[i] >= 'A' && num[i] <= 'F') result += num[i] - 'A' + 10;
else {
printf("Error: bad character %c in number %s\n", num[i], num);
exit(1);
}
}
return result;
}
__uint128_t parseNum(char *num) {
// uint64_t result;
__uint128_t result;
int size; // size of operands in bytes (2= half, 4=single, 8 = double) int size; // size of operands in bytes (2= half, 4=single, 8 = double)
if (strlen(num) < 8) size = 2; if (strlen(num) < 8) size = 2;
else if (strlen(num) < 16) size = 4; else if (strlen(num) < 16) size = 4;
else if (strlen(num) < 19) size = 8; else if (strlen(num) < 32) size = 8;
else if (strlen(num) < 35) size = 16; // *** will need to increase
else { else {
printf("Error: only half, single, and double precision supported"); printf("Error: only half, single, double, or quad precision supported");
exit(1); exit(1);
} }
if (opSize != 0) { if (opSize != 0) {
@ -179,7 +244,7 @@ uint64_t parseNum(char *num) {
opSize = size; opSize = size;
//printf ("Operand size is %d\n", opSize); //printf ("Operand size is %d\n", opSize);
} }
result = (uint64_t)strtoul(num, NULL, 16); result = (__uint128_t)strtoul128(num, 16);
//printf("Parsed %s as 0x%lx\n", num, result); //printf("Parsed %s as 0x%lx\n", num, result);
return result; return result;
} }
@ -206,7 +271,8 @@ char parseRound(char *rnd) {
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
uint64_t xn, yn, zn; //uint64_t xn, yn, zn;
__uint128_t xn, yn, zn;
char op1, op2; char op1, op2;
char cmd[200]; char cmd[200];
@ -217,6 +283,7 @@ int main(int argc, char *argv[])
exit(1); exit(1);
} else { } else {
softfloat_roundingMode = softfloat_round_near_even; softfloat_roundingMode = softfloat_round_near_even;
//printf("argv[0] = %s arvg[1] = %s argv[2] = %s argv[3] = %s\n", argv[0], argv[1], argv[2], argv[3]);
xn = parseNum(argv[1]); xn = parseNum(argv[1]);
yn = parseNum(argv[3]); yn = parseNum(argv[3]);
op1 = parseOp(argv[2]); op1 = parseOp(argv[2]);
@ -241,12 +308,22 @@ int main(int argc, char *argv[])
r = f32_mulAdd(x, y, z); r = f32_mulAdd(x, y, z);
printF32("X", x); printF32("Y", y); printF32("Z", z); printF32("X", x); printF32("Y", y); printF32("Z", z);
printF32("result = X*Y+Z", r); printFlags(); printF32("result = X*Y+Z", r); printFlags();
} else { // opSize = 8 } else if (opSize == 8) {
float64_t x, y, z, r; float64_t x, y, z, r;
x.v = xn; y.v = yn; z.v = zn; x.v = xn; y.v = yn; z.v = zn;
r = f64_mulAdd(x, y, z); r = f64_mulAdd(x, y, z);
printF64("X", x); printF64("Y", y); printF64("Z", z); printF64("X", x); printF64("Y", y); printF64("Z", z);
printF64("result = X*Y+Z", r); printFlags(); printF64("result = X*Y+Z", r); printFlags();
} else { // opSize = 16
float128_t x, y, z, r;
qp xc, yc, zc;
xc.v = xn; yc.v = yn; zc.v = zn;
x.v[0] = xc.v64[0]; x.v[1] = xc.v64[1];
y.v[0] = yc.v64[0]; y.v[1] = yc.v64[1];
z.v[0] = zc.v64[0]; z.v[1] = zc.v64[1];
r = f128_mulAdd(x, y, z);
printF128("X", x); printF128("Y", y); printF128("Z", z);
printF128("result = X*Y+Z", r); printFlags();
} }
} }
} else { } else {
@ -279,7 +356,7 @@ int main(int argc, char *argv[])
sprintf(cmd, "0x%08x %c 0x%08x", x.v, op1, y.v); sprintf(cmd, "0x%08x %c 0x%08x", x.v, op1, y.v);
printF32(cmd, r); printFlags(); printF32(cmd, r); printFlags();
} else { // opSize = 8 } else if (opSize == 8) { // opSize = 8
float64_t x, y, r; float64_t x, y, r;
x.v = xn; y.v = yn; x.v = xn; y.v = yn;
switch (op1) { switch (op1) {
@ -293,7 +370,25 @@ int main(int argc, char *argv[])
printF64("X", x); printF64("Y", y); printF64("X", x); printF64("Y", y);
sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v); sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v);
printF64(cmd, r); printFlags(); printF64(cmd, r); printFlags();
} else { // opSize = 16
float128_t x, y, r;
qp xc, yc;
xc.v = xn; yc.v = yn;
x.v[0] = xc.v64[0]; x.v[1] = xc.v64[1];
y.v[0] = yc.v64[0]; y.v[1] = yc.v64[1];
//x.v = xn; y.v = yn;
switch (op1) {
case 'x': r = f128_mul(x, y); break;
case '+': r = f128_add(x, y); break;
case '-': r = f128_sub(x, y); break;
case '/': r = f128_div(x, y); break;
case '%': r = f128_rem(x, y); break;
default: printf("Unknown op %c\n", op1); exit(1);
}
printF128("X", x); printF128("Y", y);
//sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v);
printF128(cmd, r); printFlags();
printF128val(r);
} }
} }
} }

View File

@ -287,7 +287,7 @@ typedef struct packed {
int LOGCVTLEN; int LOGCVTLEN;
int NORMSHIFTSZ; int NORMSHIFTSZ;
int LOGNORMSHIFTSZ; int LOGNORMSHIFTSZ;
int CORRSHIFTSZ; int FMALEN;
// division constants // division constants
int LOGR ; int LOGR ;

View File

@ -34,13 +34,13 @@ module fma import cvw::*; #(parameter cvw_t P) (
input logic XZero, YZero, ZZero, // is the input zero input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] OpCtrl, // operation control input logic [2:0] OpCtrl, // operation control
output logic ASticky, // sticky bit that is calculated during alignment output logic ASticky, // sticky bit that is calculated during alignment
output logic [3*P.NF+5:0] Sm, // the positive sum's significand output logic [P.FMALEN-1:0] Sm, // the positive sum's significand
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other operations) output logic As, // the aligned addend's sign (modified Z sign for other operations)
output logic Ps, // the product's sign output logic Ps, // the product's sign
output logic Ss, // the sum's sign output logic Ss, // the sum's sign
output logic [P.NE+1:0] Se, // the sum's exponent output logic [P.NE+1:0] Se, // the sum's exponent
output logic [$clog2(3*P.NF+7)-1:0] SCnt // normalization shift count output logic [$clog2(P.FMALEN+1)-1:0] SCnt // normalization shift count
); );
// OpCtrl: // OpCtrl:
@ -54,8 +54,8 @@ module fma import cvw::*; #(parameter cvw_t P) (
// 111 - sub // 111 - sub
logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*P.NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) logic [P.FMALEN-1:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
logic [3*P.NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [P.FMALEN-1:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
logic KillProd; // set the product to zero before addition if the product is too small to matter logic KillProd; // set the product to zero before addition if the product is too small to matter
logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
@ -89,6 +89,6 @@ module fma import cvw::*; #(parameter cvw_t P) (
fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
fmalza #(3*P.NF+6, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); fmalza #(P.FMALEN, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
endmodule endmodule

View File

@ -31,14 +31,14 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [P.NF:0] Zm, // significand in U(0.NF) format] input logic [P.NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero input logic XZero, YZero, ZZero, // is the input zero
output logic [3*P.NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic [P.FMALEN-1:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ASticky, // Sticky bit calculated from the aliged addend output logic ASticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero output logic KillProd // should the product be set to zero
); );
logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*P.NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) logic [P.FMALEN+P.NF-1:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*P.NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic [P.FMALEN+P.NF-1:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ; // should the addend be killed logic KillZ; // should the addend be killed
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -56,7 +56,7 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
// | 54'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
// | addnend | // | addnend |
assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)}; assign ZmPreshifted = {Zm,(P.FMALEN-1)'(0)};
assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero; assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5)); assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5));
@ -87,6 +87,6 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
end end
end end
assign Am = ZmShifted[4*P.NF+5:P.NF]; assign Am = ZmShifted[P.FMALEN+P.NF-1:P.NF];
endmodule endmodule

View File

@ -119,14 +119,14 @@ module fpu import cvw::*; #(parameter cvw_t P) (
// Fma Signals // Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*P.NF+5:0] SmE, SmM; // Sum significand logic [P.FMALEN-1:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [P.NE+1:0] SeE,SeM; // Sum exponent logic [P.NE+1:0] SeE,SeM; // Sum exponent
logic InvAE, InvAM; // Invert addend logic InvAE, InvAM; // Invert addend
logic AsE, AsM; // Addend sign logic AsE, AsM; // Addend sign
logic PsE, PsM; // Product sign logic PsE, PsM; // Product sign
logic SsE, SsM; // Sum sign logic SsE, SsM; // Sum sign
logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM; // LZA sum leading zero count logic [$clog2(P.FMALEN+1)-1:0] SCntE, SCntM; // LZA sum leading zero count
// Cvt Signals // Cvt Signals
logic [P.NE:0] CeE, CeM; // convert intermediate expoent logic [P.NE:0] CeE, CeM; // convert intermediate expoent
@ -358,8 +358,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM}); flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM});
flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); flopenrc #(P.FMALEN) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, flopenrc #($clog2(P.FMALEN+1)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,

View File

@ -51,7 +51,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
// Unbiased exponent // Unbiased exponent
assign E = Xe - P.BIAS[P.NE-1:0]; assign E = Xe - P.BIAS[P.NE-1:0];
assign Xep1 = Xe + 1; assign Xep1 = Xe + 1'b1;
////////////////////////////////////////// //////////////////////////////////////////
// Compute LSB L', rounding bit R' and Sticky bit T' // Compute LSB L', rounding bit R' and Sticky bit T'
@ -85,7 +85,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
assign Lnonneg = |(Xm & HotE); assign Lnonneg = |(Xm & HotE);
assign Rnonneg = |(Xm & HotEP1); assign Rnonneg = |(Xm & HotEP1);
assign Trunc = Xm & IMask; assign Trunc = Xm & IMask;
assign {Two, Rnd} = Trunc + HotE; // Two means result is 10.000000 = 2.0 assign {Two, Rnd} = Trunc + HotE; // Two means result overflowed to 10.000000 = 2.0
// mux and AND-OR logic to select final rounding bits // mux and AND-OR logic to select final rounding bits
mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp); mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp);

View File

@ -30,13 +30,13 @@
module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [P.NE+1:0] FmaSe, // sum's exponent input logic [P.NE+1:0] FmaSe, // sum's exponent
input logic [3*P.NF+5:0] FmaSm, // the positive sum input logic [P.FMALEN-1:0] FmaSm, // the positive sum
input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // normalization shift count input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // normalization shift count
output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
output logic FmaSZero, // is the result subnormal - calculated before LZA corection output logic FmaSZero, // is the sum zero
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
output logic [$clog2(3*P.NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*P.NF+7:0] FmaShiftIn // is the sum zero output logic [P.FMALEN+1:0] FmaShiftIn
); );
logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias
logic [P.NE+1:0] BiasCorr; // correction for bias logic [P.NE+1:0] BiasCorr; // correction for bias
@ -49,7 +49,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
assign FmaSZero = ~(|FmaSm); assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent FmaSe-FmaSCnt+NF+2 // calculate the sum's exponent FmaSe-FmaSCnt+NF+2
assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(P.FMALEN+1)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);
//convert the sum's exponent into the proper precision //convert the sum's exponent into the proper precision
if (P.FPSIZES == 1) begin if (P.FPSIZES == 1) begin
@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
// set and calculate the shift input and amount // set and calculate the shift input and amount
// - shift once if killing a product and the result is subnormal // - shift once if killing a product and the result is subnormal
assign FmaShiftIn = {2'b0, FmaSm}; assign FmaShiftIn = {2'b0, FmaSm};
if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1; if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
endmodule endmodule

View File

@ -44,9 +44,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
input logic FmaPs, // the product's sign input logic FmaPs, // the product's sign
input logic FmaSs, // Sum sign input logic FmaSs, // Sum sign
input logic [P.NE+1:0] FmaSe, // the sum's exponent input logic [P.NE+1:0] FmaSe, // the sum's exponent
input logic [3*P.NF+5:0] FmaSm, // the positive sum input logic [P.FMALEN-1:0] FmaSm, // the positive sum
input logic FmaASticky, // sticky bit that is calculated during alignment input logic FmaASticky, // sticky bit that is calculated during alignment
input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // the normalization shift count input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // the normalization shift count
//divide signals //divide signals
input logic DivSticky, // divider sticky bit input logic DivSticky, // divider sticky bit
input logic [P.NE+1:0] DivUe, // divsqrt exponent input logic [P.NE+1:0] DivUe, // divsqrt exponent
@ -70,8 +70,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
logic Rs; // result sign logic Rs; // result sign
logic [P.NF-1:0] Rf; // Result fraction logic [P.NF-1:0] Rf; // Result fraction
logic [P.NE-1:0] Re; // Result exponent logic [P.NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign logic Ms; // normalized sign
logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction logic [P.NORMSHIFTSZ-1:0] Mf; // normalized fraction
logic [P.NE+1:0] Me; // normalized exponent logic [P.NE+1:0] Me; // normalized exponent
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag) logic UfPlus1; // do you add one (for determining underflow flag)
@ -86,10 +86,10 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// fma signals // fma signals
logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic [P.NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero logic FmaSZero; // is the sum zero
logic [3*P.NF+7:0] FmaShiftIn; // fma shift input logic [P.FMALEN+1:0] FmaShiftIn; // fma shift input
logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma
// division signals // division signals
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
@ -154,8 +154,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
always_comb always_comb
case(PostProcSel) case(PostProcSel)
2'b10: begin // fma 2'b10: begin // fma
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt}; ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}}; ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}};
end end
2'b00: begin // cvt 2'b00: begin // cvt
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};

View File

@ -32,7 +32,7 @@ module round import cvw::*; #(parameter cvw_t P) (
input logic [2:0] Frm, // rounding mode input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign input logic Ms, // normalized sign
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction input logic [P.NORMSHIFTSZ-1:0] Mf, // normalized fraction
// fma // fma
input logic FmaOp, // is an fma operation being done? input logic FmaOp, // is an fma operation being done?
input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma
@ -123,61 +123,61 @@ module round import cvw::*; #(parameter cvw_t P) (
// | NF |1|1| // | NF |1|1|
// ^ ^ if floating point result // ^ ^ if floating point result
// ^ if not an FMA result // ^ if not an FMA result
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | if (XLENPOS == 1)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN // 2: NF > XLEN
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) | if (XLENPOS == 2)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 2) begin end else if (P.FPSIZES == 2) begin
// XLEN is either 64 or 32 // XLEN is either 64 or 32
// so half and single are always smaller then XLEN // so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1 // 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) | if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 3) begin end else if (P.FPSIZES == 3) begin
// 1: XLEN > NF > NF1 // 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 4) begin end else if (P.FPSIZES == 4) begin
// Quad precision will always be greater than XLEN // Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1 // 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);
// 3: NF > NF1 > XLEN // 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);
end end
@ -188,32 +188,32 @@ module round import cvw::*; #(parameter cvw_t P) (
// determine round and LSB of the rounded value // determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag // - underflow round bit is used to determint the underflow flag
if (P.FPSIZES == 1) begin if (P.FPSIZES == 1) begin
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; assign FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1];
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; assign FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; assign FpRound = Mf[P.NORMSHIFTSZ-P.NF-2];
end else if (P.FPSIZES == 2) begin end else if (P.FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1]; assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-1] : Mf[P.NORMSHIFTSZ-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1]; assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF] : Mf[P.NORMSHIFTSZ-P.NF1];
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2]; assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-2] : Mf[P.NORMSHIFTSZ-P.NF1-2];
end else if (P.FPSIZES == 3) begin end else if (P.FPSIZES == 3) begin
always_comb always_comb
case (OutFmt) case (OutFmt)
P.FMT: begin P.FMT: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; FpRound = Mf[P.NORMSHIFTSZ-P.NF-2];
end end
P.FMT1: begin P.FMT1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.NF1-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF1];
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2]; FpRound = Mf[P.NORMSHIFTSZ-P.NF1-2];
end end
P.FMT2: begin P.FMT2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.NF2-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF2];
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2]; FpRound = Mf[P.NORMSHIFTSZ-P.NF2-2];
end end
default: begin default: begin
FpGuard = 1'bx; FpGuard = 1'bx;
@ -225,31 +225,31 @@ module round import cvw::*; #(parameter cvw_t P) (
always_comb always_comb
case (OutFmt) case (OutFmt)
2'h3: begin 2'h3: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.Q_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.Q_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2]; FpRound = Mf[P.NORMSHIFTSZ-P.Q_NF-2];
end end
2'h1: begin 2'h1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.D_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.D_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2]; FpRound = Mf[P.NORMSHIFTSZ-P.D_NF-2];
end end
2'h0: begin 2'h0: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.S_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.S_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2]; FpRound = Mf[P.NORMSHIFTSZ-P.S_NF-2];
end end
2'h2: begin 2'h2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1]; FpGuard = Mf[P.NORMSHIFTSZ-P.H_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF]; FpLsbRes = Mf[P.NORMSHIFTSZ-P.H_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2]; FpRound = Mf[P.NORMSHIFTSZ-P.H_NF-2];
end end
endcase endcase
end end
assign Guard = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard; assign Guard = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-1] : FpGuard;
assign LsbRes = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes; assign LsbRes = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN] : FpLsbRes;
assign Round = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound; assign Round = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-2] : FpRound;
always_comb begin always_comb begin
// Determine if you add 1 // Determine if you add 1
@ -296,7 +296,7 @@ module round import cvw::*; #(parameter cvw_t P) (
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
// trim unneeded bits from fraction // trim unneeded bits from fraction
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF]; assign RoundFrac = Mf[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.NF];
// select the exponent // select the exponent
always_comb always_comb

View File

@ -41,11 +41,11 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
input logic FmaSZero, input logic FmaSZero,
// output // output
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum after correction output logic [P.NORMSHIFTSZ-1:0] Mf, // the shifted sum after correction
output logic [P.NE+1:0] Ue // corrected exponent for divider output logic [P.NE+1:0] Ue // corrected exponent for divider
); );
logic [P.CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction logic [P.NORMSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction
logic ResSubnorm; // is the result Subnormal logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left logic LeftShiftQm; // should the divsqrt result be shifted one to the left
@ -69,12 +69,12 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm; assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm;
// one bit right shift for FMA or division // one bit right shift for FMA or division
mux2 #(P.NORMSHIFTSZ-2) corrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], RightShift, CorrShifted); mux2 #(P.NORMSHIFTSZ) corrmux({Shifted[P.NORMSHIFTSZ-3:0], 2'b00}, {Shifted[P.NORMSHIFTSZ-2:1], 2'b00}, RightShift, CorrShifted);
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb always_comb
if (FmaOp | (DivOp & ~DivResSubnorm)) Mf = CorrShifted; if (FmaOp | (DivOp & ~DivResSubnorm)) Mf = CorrShifted;
else Mf = Shifted[P.NORMSHIFTSZ-1:2]; else Mf = Shifted[P.NORMSHIFTSZ-1:0];
// Determine sum's exponent // Determine sum's exponent
// main exponent issues: // main exponent issues:

View File

@ -98,8 +98,8 @@ module testbench_fp;
logic [P.NE+1:0] Se; logic [P.NE+1:0] Se;
logic ASticky; logic ASticky;
logic KillProd; logic KillProd;
logic [$clog2(3*P.NF+7)-1:0] SCnt; logic [$clog2(P.FMALEN+1)-1:0] SCnt;
logic [3*P.NF+5:0] Sm; logic [P.FMALEN-1:0] Sm;
logic InvA; logic InvA;
logic NegSum; logic NegSum;
logic As; logic As;

69
tests/fp/quad/fp_dataset.py Normal file → Executable file
View File

@ -5,6 +5,7 @@ import random
import sys import sys
import math import math
from decimal import * from decimal import *
import os
sys.set_int_max_str_digits(10000) sys.set_int_max_str_digits(10000)
fzero = ['0x00000000', '0x80000000'] fzero = ['0x00000000', '0x80000000']
@ -347,6 +348,35 @@ def comments_parser(coverpoints):
cvpts.append((cvpt+ " #nosat",comment)) cvpts.append((cvpt+ " #nosat",comment))
return cvpts return cvpts
def softfloat_sub(a, b):
cmd = "$WALLY/examples/fp/fpcalc/fpcalc " + a + " - " + b
result = os.system(cmd)
print("cmd = ", cmd, "returns result = ", result)
return result
# rs1, rs3, result are hexadecimal strings
def gen_rs2(iflen,opcode,rs1,rs3,result):
if opcode in 'fadd':
rs2 = softfloat_sub(result, rs1)
elif opcode in 'fsub':
rs2 = rs1 - fields_dec_converter(iflen,result[i][0])
elif opcode in 'fmul':
rs2 = fields_dec_converter(iflen,result[i][0])/rs1
elif opcode in 'fdiv':
if fields_dec_converter(iflen,result[i][0]) != 0:
rs2 = rs1/fields_dec_converter(iflen,result[i][0])
elif opcode in 'fsqrt':
rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0])
elif opcode in 'fmadd':
rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1
elif opcode in 'fnmadd':
rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1
elif opcode in 'fmsub':
rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1
elif opcode in 'fnmsub':
rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1
return rs2
def ibm_b1(flen, iflen, opcode, ops): def ibm_b1(flen, iflen, opcode, ops):
''' '''
IBM Model B1 Definition: IBM Model B1 Definition:
@ -525,25 +555,26 @@ def ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1): #***Quad support
rs1 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs1_bin[2:],2))[3:]) rs1 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs1_bin[2:],2))[3:])
#print(rs1) #print(rs1)
rs3 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs3_bin[2:],2))[3:]) rs3 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs3_bin[2:],2))[3:])
if opcode in 'fadd': rs2 = gen_rs2(iflen,opcode,"3FFF8000000000000000000000000000","3FFF4000000000000000000000000000","3FFF8800000000000000000000000000")
rs2 = fields_dec_converter(iflen,result[i][0]) - rs1 # if opcode in 'fadd':
elif opcode in 'fsub': # rs2 = fields_dec_converter(iflen,result[i][0]) - rs1
rs2 = rs1 - fields_dec_converter(iflen,result[i][0]) # elif opcode in 'fsub':
elif opcode in 'fmul': # rs2 = rs1 - fields_dec_converter(iflen,result[i][0])
rs2 = fields_dec_converter(iflen,result[i][0])/rs1 # elif opcode in 'fmul':
elif opcode in 'fdiv': # rs2 = fields_dec_converter(iflen,result[i][0])/rs1
if fields_dec_converter(iflen,result[i][0]) != 0: # elif opcode in 'fdiv':
rs2 = rs1/fields_dec_converter(iflen,result[i][0]) # if fields_dec_converter(iflen,result[i][0]) != 0:
elif opcode in 'fsqrt': # rs2 = rs1/fields_dec_converter(iflen,result[i][0])
rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0]) # elif opcode in 'fsqrt':
elif opcode in 'fmadd': # rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0])
rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1 # elif opcode in 'fmadd':
elif opcode in 'fnmadd': # rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1
rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1 # elif opcode in 'fnmadd':
elif opcode in 'fmsub': # rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1
rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1 # elif opcode in 'fmsub':
elif opcode in 'fnmsub': # rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1
rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1 # elif opcode in 'fnmsub':
# rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1
if(iflen==32): if(iflen==32):
m = struct.unpack('f', struct.pack('f', rs2))[0] m = struct.unpack('f', struct.pack('f', rs2))[0]

6
tests/fp/quad/fpdatasetgen.py Normal file → Executable file
View File

@ -1,7 +1,9 @@
#!/usr/bin/python
from fp_dataset import * from fp_dataset import *
#coverpoints=ibm_b1(128, 128, 'fadd.q', 2) #ibm_b1(flen, iflen, opcode, ops) #coverpoints=ibm_b1(128, 128, 'fadd.q', 2) #ibm_b1(flen, iflen, opcode, ops)
#coverpoints=ibm_b2(128,128,'fadd.q',2) #ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1) coverpoints=ibm_b2(128,128,'fadd.q',2) #ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1)
coverpoints=ibm_b2(32,32,'fadd.s',2) #ibm_b2(flen, iflen, opcode, ops,seed = -1) #coverpoints=ibm_b2(32,32,'fadd.s',2) #ibm_b2(flen, iflen, opcode, ops,seed = -1)
#print(coverpoints) #print(coverpoints)
#quad_precision_hex = "0x3ff00000000000000000000000000001" # Example quad precision hexadecimal value #quad_precision_hex = "0x3ff00000000000000000000000000001" # Example quad precision hexadecimal value
#quad_precision_dec = fields_dec_converter(128, quad_precision_hex) #quad_precision_dec = fields_dec_converter(128, quad_precision_hex)