Merge pull request #1268 from davidharrishmc/dev

E154 Lab 2 fma16_testgen and odd solutions
This commit is contained in:
Jordan Carlin 2025-02-08 16:04:41 -08:00 committed by GitHub
commit a1b4ed751a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 630 additions and 0 deletions

6
.gitignore vendored
View File

@ -161,6 +161,12 @@ examples/asm/sumtest/sumtest
examples/asm/example/example examples/asm/example/example
examples/asm/trap/trap examples/asm/trap/trap
examples/asm/etc/pause examples/asm/etc/pause
examples/C/fmul
examples/exercises/fma16/fma16.sv
examples/exercises/fma16/fma16_testgen
examples/exercises/fma16/sol
examples/exercises/riscvsoc_solutions
# Other # Other
external external

View File

@ -0,0 +1,6 @@
#include <string.h>
int main(void) {
char str[] = "Hello Wally!";
return strlen(str);
}

View File

@ -0,0 +1,11 @@
TARGET = 17p1
$(TARGET).objdump: $(TARGET).elf
riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
$(TARGET).elf: $(TARGET).c Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc_zbb -mabi=lp64 -mcmodel=medany \
-nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf
clean:
rm -f $(TARGET).elf $(TARGET).objdump

View File

@ -0,0 +1,10 @@
.section .text.init
.globl rvtest_entry_point
rvtest_entry_point:
li t0, 0x42
li t1, 0xED
add t2, t0, t1
self_loop:
j self_loop

View File

@ -0,0 +1,13 @@
TARGET = 3p1
$(TARGET).objdump: $(TARGET).elf
riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
$(TARGET).elf: $(TARGET).S Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
-nostartfiles -T../../link/link.ld $(TARGET).S -o $(TARGET).elf
clean:
rm -f $(TARGET).elf $(TARGET).objdump

View File

@ -0,0 +1,6 @@
#include <string.h>
int main(void) {
char str[] = "Hello Wally!";
return strlen(str);
}

View File

@ -0,0 +1,11 @@
TARGET = 3p13
$(TARGET).objdump: $(TARGET).elf
riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
$(TARGET).elf: $(TARGET).c Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv32gc -mabi=ilp32 -mcmodel=medany \
-nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf
clean:
rm -f $(TARGET).elf $(TARGET).objdump

View File

@ -0,0 +1,19 @@
TARGET = sumtest
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).S sum.S Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
-nostartfiles -T../../link/link.ld $(TARGET).S sum.S
sim:
riscv_sim_RV64 -T $(TARGET).signature.output --signature-granularity 8 $(TARGET)
diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit
echo "Signature matches! Success!"
clean:
rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output

View File

@ -0,0 +1,32 @@
// sum.s
// David_Harris@hmc.edu 24 December 2021
// Add up numbers from 1 to N.
// result in s0, i in s1, N in a0, return answer in a0
//
// long sum(long N) {
// long result, i;
// result = 0;
// for (i=1; i<=N; i++) result = result + i;
// return result;
// }
.global sum
sum:
addi sp, sp, -16 # make room to save s0 and s1 on the stack
sd s0, 0(sp)
sd s1, 8(sp)
li s0, 0 # result = 0
li s1, 1 # i = 1
for: bgt s1, a0, done # exit loop if i > n
add s0, s0, s1 # result = result + i
addi s1, s1, 1 # i++
j for # repeat
done:
mv a0, s0 # put result in a0 to return
ld s0, 0(sp) # restore s0 and s1 from stack
ld s1, 8(sp)
addi sp, sp, 16
ret # return from function

View File

@ -0,0 +1,43 @@
// sumtest.S
// David_Harris@hmc.edu 24 December 2021
.global rvtest_entry_point
rvtest_entry_point:
la sp, topofstack # Initialize stack pointer
la t0, N # get address of N in data
ld a0, 0(t0) # load N
csrr s8, instret # count instructions before call
jal sum # call sum(N)
csrr s9, instret # count instructions after call
sub s9, s9, s8 # length of call
la t0, begin_signature # address of signature
sd a0, 0(t0) # store sum(N) in signature
sd s9, 8(t0) # record performance
write_tohost:
la t1, tohost
li t0, 1 # 1 for success, 3 for failure
sd t0, 0(t1) # send success code
self_loop:
j self_loop # wait
.section .tohost
tohost: # write to HTIF
.dword 0
fromhost:
.dword 0
.data
N:
.dword 6
.EQU XLEN,64
begin_signature:
.fill 2*(XLEN/32),4,0xdeadbeef #
end_signature:
# Initialize stack with room for 512 bytes
.bss
.space 512
topofstack:

View File

@ -0,0 +1,2 @@
0000000000000015
0000000000000025

View File

@ -0,0 +1,19 @@
TARGET = sumtest
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).S sum.S Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
-nostartfiles -T../../link/link.ld $(TARGET).S sum.S
sim:
spike +signature=$(TARGET).signature.output +signature-granularity=8 $(TARGET)
diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit
echo "Signature matches! Success!"
clean:
rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output

View File

@ -0,0 +1,32 @@
// sum.s
// David_Harris@hmc.edu 24 December 2021
// Add up numbers from 1 to N.
// result in s0, i in s1, N in a0, return answer in a0
//
// long sum(long N) {
// long result, i;
// result = 0;
// for (i=1; i<=N; i++) result = result + i;
// return result;
// }
.global sum
sum:
addi sp, sp, -16 # make room to save s0 and s1 on the stack
sd s0, 0(sp)
sd s1, 8(sp)
li s0, 0 # result = 0
li s1, 1 # i = 1
for: bgt s1, a0, done # exit loop if i > n
add s0, s0, s1 # result = result + i
addi s1, s1, 1 # i++
j for # repeat
done:
mv a0, s0 # put result in a0 to return
ld s0, 0(sp) # restore s0 and s1 from stack
ld s1, 8(sp)
addi sp, sp, 16
ret # return from function

View File

@ -0,0 +1,43 @@
// sumtest.S
// David_Harris@hmc.edu 24 December 2021
.global rvtest_entry_point
rvtest_entry_point:
la sp, topofstack # Initialize stack pointer
la t0, N # get address of N in data
ld a0, 0(t0) # load N
csrr s8, instret # count instructions before call
jal sum # call sum(N)
csrr s9, instret # count instructions after call
sub s9, s9, s8 # length of call
la t0, begin_signature # address of signature
sd a0, 0(t0) # store sum(N) in signature
sd s9, 8(t0) # record performance
write_tohost:
la t1, tohost
li t0, 1 # 1 for success, 3 for failure
sd t0, 0(t1) # send success code
self_loop:
j self_loop # wait
.section .tohost
tohost: # write to HTIF
.dword 0
fromhost:
.dword 0
.data
N:
.dword 6
.EQU XLEN,64
begin_signature:
.fill 2*(XLEN/32),4,0xdeadbeef #
end_signature:
# Initialize stack with room for 512 bytes
.bss
.space 512
topofstack:

View File

@ -0,0 +1,2 @@
0000000000000015
0000000000000025

View File

@ -0,0 +1,32 @@
TARGET = matvecmul
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\
-march=rv64gc -mabi=lp64d -mcmodel=medany \
-nostdlib -static -lm -fno-tree-loop-distribute-patterns \
-T../../C/common/test.ld -I../../C/common \
$(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
# Compiler flags:
# -o $(TARGET) defines the name of the output file
# -g generates debugging symbols for gdb
# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
# -static forces static linking (no dynamic shared libraries on bare metal)
# -lm links the math library if necessary (when #include math.h)
# -nostdlib avoids inserting standard startup files and default libraries
# because we are using crt.s on bare metal
# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library
# -T specifies the linker file
# -I specifies the include path (e.g. for util.h)
# The last line defines the C files to compile.
# crt.S is needed as our startup file to initialize the processor
# syscalls.c implements printf through the HTIF for Spike
# other flags from riscv-tests makefiles that don't seem to be important
# -ffast-math -DPREALLOCATE=1 -std=gnu99 \
# -fno-common -fno-builtin-printf -nostartfiles -lgcc \
clean:
rm -f $(TARGET) $(TARGET).objdump

View File

@ -0,0 +1,22 @@
#include <stdio.h> // supports printf
#include "util.h" // supports verify
// Matrix-vector multiplication y = Ax.
// A is an m rows x n columns matrix.
void matvecmul(int A[], int x[], int y[], int m, int n) {
int i, j, sum;
for (i=0; i<m; i = i + 1) {
sum = 0;
for (j=0; j<n; j = j + 1)
sum = sum + A[i*n+j] * x[j];
y[i] = sum;
}
}
void main(void) {
int A[6] = {1, 2, 3, 4, 5, 6};
int x[3] = {7, 8, 9};
int y[2];
matvecmul(A, x, y, 2, 3);
}

View File

@ -0,0 +1,32 @@
TARGET = fir
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O2\
-march=rv64gc -mabi=lp64d -mcmodel=medany \
-nostdlib -static -lm -fno-tree-loop-distribute-patterns \
-T../../C/common/test.ld -I../../C/common \
$(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
# Compiler flags:
# -o $(TARGET) defines the name of the output file
# -g generates debugging symbols for gdb
# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
# -static forces static linking (no dynamic shared libraries on bare metal)
# -lm links the math library if necessary (when #include math.h)
# -nostdlib avoids inserting standard startup files and default libraries
# because we are using crt.s on bare metal
# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library
# -T specifies the linker file
# -I specifies the include path (e.g. for util.h)
# The last line defines the C files to compile.
# crt.S is needed as our startup file to initialize the processor
# syscalls.c implements printf through the HTIF for Spike
# other flags from riscv-tests makefiles that don't seem to be important
# -ffast-math -DPREALLOCATE=1 -std=gnu99 \
# -fno-common -fno-builtin-printf -nostartfiles -lgcc \
clean:
rm -f $(TARGET) $(TARGET).objdump

View File

@ -0,0 +1,83 @@
#include <stdio.h> // supports printf
#include "util.h" // supports verify
// Add two Q1.31 fixed point numbers
int add_q31(int a, int b) {
return a + b;
}
// Multiplly two Q1.31 fixed point numbers
int mul_q31(int a, int b) {
long res = (long)a * (long)b;
int result = res >> 31; // shift right to get the 32-bit result; this is equivalent to shifting left by 1 and discarding the bottom 32 bits
//printf("mul_q31: a = %x, b = %x, res = %lx, result = %x\n", a, b, res, result);
return result;
}
// low pass filter x with coefficients c, result in y
// n is the length of x, m is the length of c
// y[i] = c[0]*x[i] + c[1]*x[i+1] + ... + c[m-1]*x[i+m-1]
// inputs in Q1.31 format
void fir(int x[], int c[], int y[], int n, int m) {
int i, j;
for (j=0; j<n-m+1; j++) {
y[j] = 0;
for (i=0; i<m; i++)
y[j] = add_q31(y[j], mul_q31(c[i], x[j-i+(m-1)]));
}
}
int main(void) {
int32_t sin_table[20] = { // in Q1.31 format
0x00000000, // sin(0*2pi/10)
0x4B3C8C12, // sin(1*2pi/10)
0x79BC384D, // sin(2*2pi/10)
0x79BC384D, // sin(3*2pi/10)
0x4B3C8C12, // sin(4*2pi/10)
0x00000000, // sin(5*2pi/10)
0xB4C373EE, // sin(6*2pi/10)
0x8643C7B3, // sin(7*2pi/10)
0x8643C7B3, // sin(8*2pi/10)
0xB4C373EE, // sin(9*2pi/10)
0x00000000, // sin(10*2pi/10)
0x4B3C8C12, // sin(11*2pi/10)
0x79BC384D, // sin(12*2pi/10)
0x79BC384D, // sin(13*2pi/10)
0x4B3C8C12, // sin(14*2pi/10)
0x00000000, // sin(15*2pi/10)
0xB4C373EE, // sin(16*2pi/10)
0x8643C7B3, // sin(17*2pi/10)
0x8643C7B3, // sin(18*2pi/10)
0xB4C373EE // sin(19*2pi/10)
};
int lowpass[4] = {0x20000001, 0x20000002, 0x20000003, 0x20000004}; // 1/4 in Q1.31 format
int y[17];
int expected[17] = { // in Q1.31 format
0x4fad3f2f,
0x627c6236,
0x4fad3f32,
0x1e6f0e17,
0xe190f1eb,
0xb052c0ce,
0x9d839dc6,
0xb052c0cb,
0xe190f1e6,
0x1e6f0e12,
0x4fad3f2f,
0x627c6236,
0x4fad3f32,
0x1e6f0e17,
0xe190f1eb,
0xb052c0ce,
0x9d839dc6
};
setStats(1); // record initial mcycle and minstret
fir(sin_table, lowpass, y, 20, 4);
setStats(0); // record elapsed mcycle and minstret
for (int i=0; i<17; i++) {
printf("y[%d] = %x\n", i, y[i]);
}
return verify(16, y, expected); // check the 1 element of s matches expected. 0 means success
}

View File

@ -0,0 +1,32 @@
TARGET = inline
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\
-march=rv64gc -mabi=lp64d -mcmodel=medany \
-nostdlib -static -lm -fno-tree-loop-distribute-patterns \
-T../../C/common/test.ld -I../../C/common \
$(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
# Compiler flags:
# -o $(TARGET) defines the name of the output file
# -g generates debugging symbols for gdb
# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
# -static forces static linking (no dynamic shared libraries on bare metal)
# -lm links the math library if necessary (when #include math.h)
# -nostdlib avoids inserting standard startup files and default libraries
# because we are using crt.s on bare metal
# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library
# -T specifies the linker file
# -I specifies the include path (e.g. for util.h)
# The last line defines the C files to compile.
# crt.S is needed as our startup file to initialize the processor
# syscalls.c implements printf through the HTIF for Spike
# other flags from riscv-tests makefiles that don't seem to be important
# -ffast-math -DPREALLOCATE=1 -std=gnu99 \
# -fno-common -fno-builtin-printf -nostartfiles -lgcc \
clean:
rm -f $(TARGET) $(TARGET).objdump

View File

@ -0,0 +1,11 @@
#include <stdio.h> // supports printf
int main(void) {
int a = 3;
int b = 4;
int c;
// compute c = a + 2*b using inline assembly
asm volatile("slli %0, %1, 1" : "=r" (c) : "r" (b)); // c = b << 1
asm volatile("add %0, %1, %2" : "=r" (c) : "r" (a), "r" (c)); // c = a + c
printf("c = %d\n", c);
}

View File

@ -0,0 +1,16 @@
CC = gcc
CFLAGS = -O3 -Wno-format-overflow
IFLAGS = -I$(WALLY)/addins/berkeley-softfloat-3/source/include/
LIBS = $(WALLY)/addins/berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

View File

@ -0,0 +1,147 @@
// fma16_testgen.c
// David_Harris 8 February 2025
// Generate tests for 16-bit FMA
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
// convert half to float for printing
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z); // call SoftFloat to compute expected result
// Extract expected flags from SoftFloat
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if ((softfloat_exceptionFlags) >> 1 % 2) fprintf(fptr, "// skip underflow: ");
// skip special cases if requested
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
// print the test case
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
// Loop over all of the exponents and fractions, generating and counting all cases
fprintf(fptr, "%s", desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
if ((fptr = fopen(fn, "w")) == 0) {
printf("Error opening to write file %s. Does directory exist?\n", fn);
exit(1);
}
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
int main()
{
if (system("mkdir -p work") != 0) exit(1); // create work directory if it doesn't exist
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
/* // example of how to generate tests with a different rounding mode
softfloat_roundingMode = softfloat_round_near_even;
genMulTests(easyExponents, easyFracts, 0, "fmul_0_rne", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RNE", 1, 0, 0, 0); */
// Add your cases here
return 0;
}