forked from Github_Repos/cvw
removed fma directory, improved plic comments
This commit is contained in:
parent
aa942feedc
commit
331ef80d0f
@ -1,23 +0,0 @@
|
||||
# Makefile
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
LIBS = -lm
|
||||
LFLAGS = -L.
|
||||
# Link against the riscv-isa-sim version of SoftFloat rather than
|
||||
# the regular version to get RISC-V NaN behavior
|
||||
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
|
||||
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
|
||||
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
|
||||
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
|
||||
SRCS = $(wildcard *.c)
|
||||
|
||||
PROGS = $(patsubst %.c,%,$(SRCS))
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
%: %.c
|
||||
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
clean:
|
||||
rm -f $(PROGS)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,23 +0,0 @@
|
||||
# fma.do
|
||||
#
|
||||
# run with vsim -do "do fma.do"
|
||||
# add -c before -do for batch simulation
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
vlib worklib
|
||||
|
||||
vlog -lint -sv -work worklib fma16.v testbench.v
|
||||
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
|
||||
vsim -lib worklib testbenchopt
|
||||
|
||||
add wave sim:/testbench_fma16/clk
|
||||
add wave sim:/testbench_fma16/reset
|
||||
add wave sim:/testbench_fma16/x
|
||||
add wave sim:/testbench_fma16/y
|
||||
add wave sim:/testbench_fma16/z
|
||||
add wave sim:/testbench_fma16/result
|
||||
add wave sim:/testbench_fma16/rexpected
|
||||
|
||||
run -all
|
@ -1,268 +0,0 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
`define FFLEN 16
|
||||
`define Nf 10
|
||||
`define Ne 5
|
||||
`define BIAS 15
|
||||
`define EMIN (-(2**(`Ne-1)-1))
|
||||
`define EMAX (2**(`Ne-1)-1)
|
||||
|
||||
`define NaN 16'h7E00
|
||||
`define INF 15'h7C00
|
||||
|
||||
// rounding modes *** update
|
||||
`define RZ 3'b00
|
||||
`define RNE 3'b01
|
||||
`define RM 3'b10
|
||||
`define RP 3'b11
|
||||
|
||||
module fma16(
|
||||
input logic [`FFLEN-1:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [`FFLEN-1:0] result);
|
||||
|
||||
logic [`Nf:0] xm, ym, zm; // U1.Nf
|
||||
logic [`Ne-1:0] xe, ye, ze; // B_Ne
|
||||
logic xs, ys, zs;
|
||||
logic zs1; // sign before optional negation
|
||||
logic [2*`Nf+1:0] pm; // U2.2Nf
|
||||
logic [`Ne:0] pe; // B_Ne+1
|
||||
logic ps; // sign of product
|
||||
logic [22:0] rm;
|
||||
logic [`Ne+1:0] re;
|
||||
logic rs;
|
||||
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
|
||||
logic [`Ne+1:0] re2;
|
||||
|
||||
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
|
||||
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
|
||||
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
|
||||
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
|
||||
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
|
||||
endmodule
|
||||
|
||||
module mult16(
|
||||
input logic mul,
|
||||
input logic [`Nf:0] xm, ym,
|
||||
input logic [`Ne-1:0] xe, ye,
|
||||
input logic xs, ys,
|
||||
output logic [2*`Nf+1:0] pm,
|
||||
output logic [`Ne:0] pe,
|
||||
output logic ps);
|
||||
|
||||
// only multiply if mul = 1
|
||||
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
|
||||
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
|
||||
assign ps = xs ^ ys; // negative if X xor Y are negative
|
||||
endmodule
|
||||
|
||||
module add16(
|
||||
input logic add,
|
||||
input logic [2*`Nf+1:0] pm, // U2.2Nf
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [`Ne:0] pe, // B_Ne+1
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic ps, zs,
|
||||
input logic negz,
|
||||
output logic [22:0] rm,
|
||||
output logic [`Ne+1:0] re, // B_Ne+2
|
||||
output logic [`Ne+1:0] re2,
|
||||
output logic rs);
|
||||
|
||||
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
|
||||
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
|
||||
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
|
||||
logic [`Nf-1:0] prezsticky;
|
||||
logic zsticky;
|
||||
logic effectivesub;
|
||||
logic rs0;
|
||||
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
|
||||
logic [`Ne:0] re1;
|
||||
|
||||
// Alignment shift
|
||||
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
|
||||
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
|
||||
always_comb // AlignCount mux; see Muller page 254
|
||||
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
|
||||
else begin AlignCnt = 0; re = {2'b0, ze}; end
|
||||
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
|
||||
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
|
||||
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
|
||||
|
||||
// Effective subtraction
|
||||
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
|
||||
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
|
||||
|
||||
// Adder
|
||||
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
|
||||
assign rs0 = r[`Nf*3+7]; // sign of the initial result
|
||||
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
|
||||
|
||||
// Sign Logic
|
||||
assign rs = ps ^ rs0; // flip the sign if necessary
|
||||
|
||||
// Leading zero counter
|
||||
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
|
||||
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
|
||||
|
||||
// Normalization shift
|
||||
always_comb // NormCount mux
|
||||
if (ExpDiff < 3) begin
|
||||
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
|
||||
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
|
||||
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
|
||||
assign rnormed = r2 << NormCnt; // *** update sticky
|
||||
/* temporarily comment out to start synth
|
||||
|
||||
// One-bit secondary normalization
|
||||
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
|
||||
else begin // *** handle sticky
|
||||
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
|
||||
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
|
||||
else begin rnormed2 = rnormed << 1; re2 = re-1; end
|
||||
end
|
||||
|
||||
// round
|
||||
assign l = rnormed2[***]; // least significant bit
|
||||
assign r = rnormed2[***-1]; // rounding bit
|
||||
assign s = ***; // sticky bit
|
||||
always_comb
|
||||
case (roundmode)
|
||||
RZ: roundup = 0;
|
||||
RP: roundup = ~rs & (r | s);
|
||||
RM: roundup = rs & (r | s);
|
||||
RNE: roundup = r & (s | l);
|
||||
default: roundup = 0;
|
||||
endcase
|
||||
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
|
||||
*/
|
||||
|
||||
// *** need to handle rounding to MAXNUM vs. INFINITY
|
||||
|
||||
// add or pass product through
|
||||
/* assign rm = add ? arm : {1'b0, pm};
|
||||
assign re = add ? are : {1'b0, pe};
|
||||
assign rs = add ? ars : ps; */
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [`Nf*3+7:0] r2,
|
||||
output logic [`Ne:0] leadingzeros
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module postproc16(
|
||||
input logic [1:0] roundmode,
|
||||
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
|
||||
input logic [22:0] rm,
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [6:0] re,
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic rs, zs, ps,
|
||||
input logic [`Ne+1:0] re2,
|
||||
output logic [15:0] result);
|
||||
|
||||
logic [9:0] uf, uff;
|
||||
logic [6:0] ue;
|
||||
logic [6:0] ueb, uebiased;
|
||||
logic invalid;
|
||||
|
||||
// Special cases
|
||||
// *** not handling signaling NaN
|
||||
// *** also add overflow/underflow/inexact
|
||||
always_comb begin
|
||||
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
|
||||
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
|
||||
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
|
||||
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
|
||||
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
|
||||
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
|
||||
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
|
||||
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
|
||||
end
|
||||
|
||||
always_comb
|
||||
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
|
||||
ue = re + 7'b1;
|
||||
uf = rm[20:11];
|
||||
end else begin // no normalization shift needed
|
||||
ue = re;
|
||||
uf = rm[19:10];
|
||||
end
|
||||
|
||||
// overflow
|
||||
always_comb begin
|
||||
ueb = ue-7'd15;
|
||||
if (ue >= 7'd46) begin // overflow
|
||||
/* uebiased = 7'd30;
|
||||
uff = 10'h3ff; */
|
||||
end else begin
|
||||
uebiased = ue-7'd15;
|
||||
uff = uf;
|
||||
end
|
||||
end
|
||||
|
||||
assign result = {rs, uebiased[4:0], uff};
|
||||
|
||||
// add special case handling for zeros, NaN, Infinity
|
||||
endmodule
|
||||
|
||||
module signadj16(
|
||||
input logic negr, negz,
|
||||
input logic xs, ys, zs1,
|
||||
output logic ps, zs);
|
||||
|
||||
assign ps = xs ^ ys; // sign of product
|
||||
assign zs = zs1 ^ negz; // sign of addend
|
||||
endmodule
|
||||
|
||||
module unpack16(
|
||||
input logic [15:0] x, y, z,
|
||||
output logic [10:0] xm, ym, zm,
|
||||
output logic [4:0] xe, ye, ze,
|
||||
output logic xs, ys, zs,
|
||||
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
|
||||
|
||||
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
|
||||
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
|
||||
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
|
||||
endmodule
|
||||
|
||||
module unpacknum16(
|
||||
input logic [15:0] num,
|
||||
output logic [10:0] m,
|
||||
output logic [4:0] e,
|
||||
output logic s,
|
||||
output logic zero, inf, nan);
|
||||
|
||||
logic [9:0] f; // fraction without leading 1
|
||||
logic [4:0] eb; // biased exponent
|
||||
|
||||
assign {s, eb, f} = num; // pull bit fields out of floating-point number
|
||||
assign m = {1'b1, f}; // prepend leading 1 to fraction
|
||||
assign e = eb; // leave bias in exponent ***
|
||||
assign zero = (e == 0 && f == 0);
|
||||
assign inf = (e == 31 && f == 0);
|
||||
assign nan = (e == 31 && f != 0);
|
||||
endmodule
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
module fma16(
|
||||
input logic [15:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [15:0] result);
|
||||
|
||||
endmodule
|
||||
|
@ -1,240 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "softfloat.h"
|
||||
#include "softfloat_types.h"
|
||||
|
||||
typedef union sp {
|
||||
float32_t v;
|
||||
float f;
|
||||
} sp;
|
||||
|
||||
// lists of tests, terminated with 0x8000
|
||||
uint16_t easyExponents[] = {15, 0x8000};
|
||||
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
|
||||
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
|
||||
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
|
||||
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
|
||||
uint16_t zeros[] = {0x0000, 0x8000};
|
||||
uint16_t infs[] = {0x7C00, 0xFC00};
|
||||
uint16_t nans[] = {0x7D00, 0x7D01};
|
||||
|
||||
void softfloatInit(void) {
|
||||
softfloat_roundingMode = softfloat_round_minMag;
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_detectTininess = softfloat_tininess_beforeRounding;
|
||||
}
|
||||
|
||||
float convFloat(float16_t f16) {
|
||||
float32_t f32;
|
||||
float res;
|
||||
sp r;
|
||||
|
||||
f32 = f16_to_f32(f16);
|
||||
r.v = f32;
|
||||
res = r.f;
|
||||
return res;
|
||||
}
|
||||
|
||||
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
float16_t result;
|
||||
int op, flagVals;
|
||||
char calc[80], flags[80];
|
||||
float32_t x32, y32, z32, r32;
|
||||
float xf, yf, zf, rf;
|
||||
float16_t smallest;
|
||||
|
||||
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
|
||||
if (!add) z.v = 0x0000; // force z to 0 to avoid add
|
||||
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
|
||||
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
|
||||
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
|
||||
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
|
||||
softfloat_exceptionFlags = 0; // clear exceptions
|
||||
result = f16_mulAdd(x, y, z);
|
||||
|
||||
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
|
||||
(softfloat_exceptionFlags >> 4) % 2,
|
||||
(softfloat_exceptionFlags >> 2) % 2,
|
||||
(softfloat_exceptionFlags >> 1) % 2,
|
||||
(softfloat_exceptionFlags) % 2);
|
||||
// pack these four flags into one nibble, discarding DZ flag
|
||||
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
|
||||
|
||||
|
||||
// convert to floats for printing
|
||||
xf = convFloat(x);
|
||||
yf = convFloat(y);
|
||||
zf = convFloat(z);
|
||||
rf = convFloat(result);
|
||||
if (mul)
|
||||
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
|
||||
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
|
||||
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
|
||||
|
||||
// omit denorms, which aren't required for this project
|
||||
smallest.v = 0x0400;
|
||||
float16_t resultmag = result;
|
||||
resultmag.v &= 0x7FFF; // take absolute value
|
||||
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
|
||||
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
|
||||
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
|
||||
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
|
||||
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
|
||||
}
|
||||
|
||||
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
|
||||
FILE *fptr, int *numCases) {
|
||||
int i, j;
|
||||
|
||||
fprintf(fptr, desc); fprintf(fptr, "\n");
|
||||
*numCases=0;
|
||||
for (i=0; e[i] != 0x8000; i++)
|
||||
for (j=0; f[j] != 0x8000; j++) {
|
||||
cases[*numCases].v = f[j] | e[i]<<10;
|
||||
*numCases = *numCases + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
z.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
y.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
y.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
z.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
z.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, l, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (l=0; l<=sgn; l++) {
|
||||
z.v ^= (l<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, sx, sy, sz, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
cases[numCases].v = 0x0000; // add +0 case
|
||||
cases[numCases+1].v = 0x8000; // add -0 case
|
||||
numCases += 2;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (sx=0; sx<=sgn; sx++) {
|
||||
x.v ^= (sx<<15);
|
||||
for (sy=0; sy<=sgn; sy++) {
|
||||
y.v ^= (sy<<15);
|
||||
for (sz=0; sz<=sgn; sz++) {
|
||||
z.v ^= (sz<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
softfloatInit(); // configure softfloat modes
|
||||
|
||||
// Test cases: multiplication
|
||||
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: addition
|
||||
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: FMA
|
||||
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: Zero, Infinity, NaN
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
|
||||
|
||||
// Full test cases with other rounding modes
|
||||
softfloat_roundingMode = softfloat_round_near_even;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_min;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_max;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
# check for warnings in Verilog code
|
||||
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
|
||||
export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
$verilator --lint-only --top-module fma16 fma16.v
|
@ -1,2 +0,0 @@
|
||||
vsim -do "do fma.do"
|
||||
|
@ -1 +0,0 @@
|
||||
vsim -c -do "do fma.do"
|
@ -1 +0,0 @@
|
||||
make -C ../../../synthDC synth DESIGN=fma16
|
@ -1,52 +0,0 @@
|
||||
/* verilator lint_off STMTDLY */
|
||||
module testbench_fma16;
|
||||
reg clk, reset;
|
||||
reg [15:0] x, y, z, rexpected;
|
||||
wire [15:0] result;
|
||||
reg [7:0] ctrl;
|
||||
reg [3:0] flagsexpected;
|
||||
reg mul, add, negp, negz;
|
||||
reg [1:0] roundmode;
|
||||
reg [31:0] vectornum, errors;
|
||||
reg [75:0] testvectors[10000:0];
|
||||
|
||||
// instantiate device under test
|
||||
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
|
||||
|
||||
// generate clock
|
||||
always
|
||||
begin
|
||||
clk = 1; #5; clk = 0; #5;
|
||||
end
|
||||
|
||||
// at start of test, load vectors and pulse reset
|
||||
initial
|
||||
begin
|
||||
$readmemh("work/fmul_0.tv", testvectors);
|
||||
vectornum = 0; errors = 0;
|
||||
reset = 1; #22; reset = 0;
|
||||
end
|
||||
|
||||
// apply test vectors on rising edge of clk
|
||||
always @(posedge clk)
|
||||
begin
|
||||
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
|
||||
{roundmode, mul, add, negp, negz} = ctrl[5:0];
|
||||
end
|
||||
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk)
|
||||
if (~reset) begin // skip during reset
|
||||
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
|
||||
$display("Error: inputs %h * %h + %h", x, y, z);
|
||||
$display(" result = %h (%h expected)", result, rexpected);
|
||||
errors = errors + 1;
|
||||
end
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 'x) begin
|
||||
$display("%d tests completed with %d errors",
|
||||
vectornum, errors);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
endmodule
|
File diff suppressed because it is too large
Load Diff
@ -1,130 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
# torturegen.pl
|
||||
# David_Harris@hmc.edu 19 April 2022
|
||||
# Convert TestFloat cases into format for fma16 project torture test
|
||||
# Strip out cases involving denorms
|
||||
|
||||
use strict;
|
||||
|
||||
my @basenames = ("add", "mul", "mulAdd");
|
||||
my @roundingmodes = ("rz", "rd", "ru", "rne");
|
||||
my @names = ();
|
||||
foreach my $name (@basenames) {
|
||||
foreach my $mode (@roundingmodes) {
|
||||
push(@names, "f16_${name}_$mode.tv");
|
||||
}
|
||||
}
|
||||
|
||||
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
|
||||
my $datestring = localtime();
|
||||
print(TORTURE "// Torture tests generated $datestring by $0\n");
|
||||
foreach my $tv (@names) {
|
||||
open(TV, "work/$tv") || die("Can't read $tv");
|
||||
my $type = &getType($tv); # is it mul, add, mulAdd
|
||||
my $rm = &getRm($tv); # rounding mode
|
||||
# if ($rm != 0) { next; } # only do rz
|
||||
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
my $linecount = 0;
|
||||
my $babyTorture = 0;
|
||||
while (<TV>) {
|
||||
my $line = $_;
|
||||
$linecount++;
|
||||
my $density = 10;
|
||||
if ($type eq "mulAdd") {$density = 500;}
|
||||
if ($babyTorture) {
|
||||
$density = 100;
|
||||
if ($type eq "mulAdd") {$density = 50000;}
|
||||
}
|
||||
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
|
||||
chomp($line); # strip off newline
|
||||
my @parts = split(/_/, $line);
|
||||
my ($x, $y, $z, $op, $w, $flags);
|
||||
$x = $parts[0];
|
||||
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
|
||||
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
|
||||
$op = $rm << 4;
|
||||
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
|
||||
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
|
||||
my $opname = sprintf("%02x", $op);
|
||||
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
|
||||
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
|
||||
$flags = substr($flags, -1); # take last character
|
||||
if (&fpval($w) eq "NaN") { $w = "7e00"; }
|
||||
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
|
||||
my $skip = "";
|
||||
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
|
||||
$skip = "Skipped denorm";
|
||||
}
|
||||
my $summary = &summary($x, $y, $z, $w, $type);
|
||||
if ($skip ne "") {
|
||||
print TORTURE "// $skip $tv line $linecount $line $summary\n"
|
||||
}
|
||||
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
|
||||
}
|
||||
close(TV);
|
||||
}
|
||||
close(TORTURE);
|
||||
|
||||
sub fpval {
|
||||
my $val = shift;
|
||||
$val = hex($val); # convert hex string to number
|
||||
my $frac = $val & 0x3FF;
|
||||
my $exp = ($val >> 10) & 0x1F;
|
||||
my $sign = $val >> 15;
|
||||
|
||||
my $res;
|
||||
if ($exp == 31 && $frac != 0) { return "NaN"; }
|
||||
elsif ($exp == 31) { $res = "INF"; }
|
||||
elsif ($val == 0) { $res = 0; }
|
||||
elsif ($exp == 0) { $res = "Denorm"; }
|
||||
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
|
||||
|
||||
if ($sign == 1) { $res = "-$res"; }
|
||||
return $res;
|
||||
}
|
||||
|
||||
sub summary {
|
||||
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
|
||||
|
||||
my $xv = &fpval($x);
|
||||
my $yv = &fpval($y);
|
||||
my $zv = &fpval($z);
|
||||
my $wv = &fpval($w);
|
||||
|
||||
if ($type eq "add") { return "$xv + $zv = $wv"; }
|
||||
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
|
||||
else {return "$xv * $yv + $zv = $wv"; }
|
||||
}
|
||||
|
||||
sub getType {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /mulAdd/) { return("mulAdd"); }
|
||||
elsif ($tv =~ /mul/) { return "mul"; }
|
||||
else { return "add"; }
|
||||
}
|
||||
|
||||
sub getRm {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /rz/) { return 0; }
|
||||
elsif ($tv =~ /rne/) { return 1; }
|
||||
elsif ($tv =~ /rd/) {return 2; }
|
||||
elsif ($tv =~ /ru/) { return 3; }
|
||||
else { return "bad"; }
|
||||
}
|
||||
|
||||
sub isdenorm {
|
||||
my $fp = shift;
|
||||
my $val = hex($fp);
|
||||
my $expv = $val >> 10;
|
||||
$expv = $expv & 0x1F;
|
||||
my $denorm = 0;
|
||||
if ($expv == 0 && $val != 0) { $denorm = 1;}
|
||||
# my $e0 = ($expv == 0);
|
||||
# my $vn0 = ($val != 0);
|
||||
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
|
||||
# print("Num $fp Exp $expv Denorm $denorm Done\n");
|
||||
return $denorm;
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate /testbench_fma16/clk
|
||||
add wave -noupdate /testbench_fma16/reset
|
||||
add wave -noupdate /testbench_fma16/x
|
||||
add wave -noupdate /testbench_fma16/y
|
||||
add wave -noupdate /testbench_fma16/z
|
||||
add wave -noupdate /testbench_fma16/result
|
||||
add wave -noupdate /testbench_fma16/rexpected
|
||||
add wave -noupdate /testbench_fma16/dut/x
|
||||
add wave -noupdate /testbench_fma16/dut/y
|
||||
add wave -noupdate /testbench_fma16/dut/z
|
||||
add wave -noupdate /testbench_fma16/dut/mul
|
||||
add wave -noupdate /testbench_fma16/dut/add
|
||||
add wave -noupdate /testbench_fma16/dut/negr
|
||||
add wave -noupdate /testbench_fma16/dut/negz
|
||||
add wave -noupdate /testbench_fma16/dut/roundmode
|
||||
add wave -noupdate /testbench_fma16/dut/result
|
||||
add wave -noupdate /testbench_fma16/dut/XManE
|
||||
add wave -noupdate /testbench_fma16/dut/YManE
|
||||
add wave -noupdate /testbench_fma16/dut/ZManE
|
||||
add wave -noupdate /testbench_fma16/dut/XExpE
|
||||
add wave -noupdate /testbench_fma16/dut/YExpE
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpE
|
||||
add wave -noupdate /testbench_fma16/dut/PExpE
|
||||
add wave -noupdate /testbench_fma16/dut/Ne
|
||||
add wave -noupdate /testbench_fma16/dut/upOneExt
|
||||
add wave -noupdate /testbench_fma16/dut/XSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/YSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ZSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/PSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManE
|
||||
add wave -noupdate /testbench_fma16/dut/NfracS
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManAl
|
||||
add wave -noupdate /testbench_fma16/dut/ZManExt
|
||||
add wave -noupdate /testbench_fma16/dut/ZManAl
|
||||
add wave -noupdate /testbench_fma16/dut/Nfrac
|
||||
add wave -noupdate /testbench_fma16/dut/res
|
||||
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
|
||||
add wave -noupdate /testbench_fma16/dut/NSamt
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpGreater
|
||||
add wave -noupdate /testbench_fma16/dut/ACLess
|
||||
add wave -noupdate /testbench_fma16/dut/upOne
|
||||
add wave -noupdate /testbench_fma16/dut/KillProd
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
configure wave -namecolwidth 237
|
||||
configure wave -valuecolwidth 64
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {4083 ns} {4235 ns}
|
@ -8,12 +8,10 @@
|
||||
// Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc)
|
||||
// With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf)
|
||||
// Supports only 1 target core and only a global threshold.
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
// This PLIC implementation serves as both the PLIC Gateways and PLIC Core.
|
||||
// It assumes interrupt sources are level-triggered wires.
|
||||
//
|
||||
// *** Big questions:
|
||||
// Do we detect requests as level-triggered or edge-trigged?
|
||||
// If edge-triggered, do we want to allow 1 source to be able to make a number of repeated requests?
|
||||
// Documentation: RISC-V System on Chip Design Chapter 15
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
@ -111,7 +109,7 @@ module plic_apb (
|
||||
if (memwrite)
|
||||
casez(entry)
|
||||
24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0];
|
||||
`ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
|
||||
`ifdef PLIC_NUM_SRC_LT_32 // eventually switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
|
||||
24'h002000: intEn[0][`N:1] <= #1 Din[`N:1];
|
||||
24'h002080: intEn[1][`N:1] <= #1 Din[`N:1];
|
||||
`endif
|
||||
@ -172,8 +170,7 @@ module plic_apb (
|
||||
end
|
||||
|
||||
// pending interrupt requests
|
||||
assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully.
|
||||
//assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
|
||||
assign nextIntPending = (intPending | requests) & ~intInProgress;
|
||||
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
|
||||
|
||||
// context-dependent signals
|
||||
@ -248,7 +245,7 @@ module plic_apb (
|
||||
end
|
||||
end
|
||||
// is the max priority > threshold?
|
||||
// *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
|
||||
// would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
|
||||
assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]);
|
||||
assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]);
|
||||
endmodule
|
||||
|
Loading…
Reference in New Issue
Block a user