diff --git a/.gitignore b/.gitignore index f23aec9b5..11acaf0b1 100644 --- a/.gitignore +++ b/.gitignore @@ -161,6 +161,12 @@ examples/asm/sumtest/sumtest examples/asm/example/example examples/asm/trap/trap examples/asm/etc/pause +examples/C/fmul +examples/exercises/fma16/fma16.sv +examples/exercises/fma16/fma16_testgen +examples/exercises/fma16/sol +examples/exercises/riscvsoc_solutions + # Other external diff --git a/examples/exercises/17p1/17p1.c b/examples/exercises/17p1/17p1.c new file mode 100644 index 000000000..719407483 --- /dev/null +++ b/examples/exercises/17p1/17p1.c @@ -0,0 +1,6 @@ +#include + +int main(void) { + char str[] = "Hello Wally!"; + return strlen(str); +} diff --git a/examples/exercises/17p1/Makefile b/examples/exercises/17p1/Makefile new file mode 100644 index 000000000..dc50ce2fa --- /dev/null +++ b/examples/exercises/17p1/Makefile @@ -0,0 +1,11 @@ +TARGET = 17p1 + +$(TARGET).objdump: $(TARGET).elf + riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump + +$(TARGET).elf: $(TARGET).c Makefile + riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc_zbb -mabi=lp64 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf + +clean: + rm -f $(TARGET).elf $(TARGET).objdump diff --git a/examples/exercises/3p1/3p1.S b/examples/exercises/3p1/3p1.S new file mode 100644 index 000000000..f17ba2718 --- /dev/null +++ b/examples/exercises/3p1/3p1.S @@ -0,0 +1,10 @@ +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: + li t0, 0x42 + li t1, 0xED + add t2, t0, t1 + +self_loop: + j self_loop diff --git a/examples/exercises/3p1/Makefile b/examples/exercises/3p1/Makefile new file mode 100644 index 000000000..a93771689 --- /dev/null +++ b/examples/exercises/3p1/Makefile @@ -0,0 +1,13 @@ +TARGET = 3p1 + +$(TARGET).objdump: $(TARGET).elf + riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump + +$(TARGET).elf: $(TARGET).S Makefile + riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld $(TARGET).S -o $(TARGET).elf + +clean: + rm -f $(TARGET).elf $(TARGET).objdump + + diff --git a/examples/exercises/3p13/3p13.c b/examples/exercises/3p13/3p13.c new file mode 100644 index 000000000..719407483 --- /dev/null +++ b/examples/exercises/3p13/3p13.c @@ -0,0 +1,6 @@ +#include + +int main(void) { + char str[] = "Hello Wally!"; + return strlen(str); +} diff --git a/examples/exercises/3p13/Makefile b/examples/exercises/3p13/Makefile new file mode 100644 index 000000000..85ed5d3c6 --- /dev/null +++ b/examples/exercises/3p13/Makefile @@ -0,0 +1,11 @@ +TARGET = 3p13 + +$(TARGET).objdump: $(TARGET).elf + riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump + +$(TARGET).elf: $(TARGET).c Makefile + riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv32gc -mabi=ilp32 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf + +clean: + rm -f $(TARGET).elf $(TARGET).objdump diff --git a/examples/exercises/3p15/Makefile b/examples/exercises/3p15/Makefile new file mode 100644 index 000000000..cec51f09a --- /dev/null +++ b/examples/exercises/3p15/Makefile @@ -0,0 +1,19 @@ +TARGET = sumtest + +$(TARGET).objdump: $(TARGET) + riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump + +$(TARGET): $(TARGET).S sum.S Makefile + riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld $(TARGET).S sum.S + +sim: + riscv_sim_RV64 -T $(TARGET).signature.output --signature-granularity 8 $(TARGET) + diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit + echo "Signature matches! Success!" + +clean: + rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output + + + diff --git a/examples/exercises/3p15/sum.S b/examples/exercises/3p15/sum.S new file mode 100644 index 000000000..ab87eb0ec --- /dev/null +++ b/examples/exercises/3p15/sum.S @@ -0,0 +1,32 @@ +// sum.s +// David_Harris@hmc.edu 24 December 2021 +// Add up numbers from 1 to N. + +// result in s0, i in s1, N in a0, return answer in a0 +// +// long sum(long N) { +// long result, i; +// result = 0; +// for (i=1; i<=N; i++) result = result + i; +// return result; +// } + +.global sum +sum: + addi sp, sp, -16 # make room to save s0 and s1 on the stack + sd s0, 0(sp) + sd s1, 8(sp) + + li s0, 0 # result = 0 + li s1, 1 # i = 1 +for: bgt s1, a0, done # exit loop if i > n + add s0, s0, s1 # result = result + i + addi s1, s1, 1 # i++ + j for # repeat + +done: + mv a0, s0 # put result in a0 to return + ld s0, 0(sp) # restore s0 and s1 from stack + ld s1, 8(sp) + addi sp, sp, 16 + ret # return from function diff --git a/examples/exercises/3p15/sumtest.S b/examples/exercises/3p15/sumtest.S new file mode 100644 index 000000000..a1b57689f --- /dev/null +++ b/examples/exercises/3p15/sumtest.S @@ -0,0 +1,43 @@ +// sumtest.S +// David_Harris@hmc.edu 24 December 2021 + +.global rvtest_entry_point +rvtest_entry_point: + la sp, topofstack # Initialize stack pointer + la t0, N # get address of N in data + ld a0, 0(t0) # load N + csrr s8, instret # count instructions before call + jal sum # call sum(N) + csrr s9, instret # count instructions after call + sub s9, s9, s8 # length of call + la t0, begin_signature # address of signature + sd a0, 0(t0) # store sum(N) in signature + sd s9, 8(t0) # record performance + +write_tohost: + la t1, tohost + li t0, 1 # 1 for success, 3 for failure + sd t0, 0(t1) # send success code + +self_loop: + j self_loop # wait + +.section .tohost +tohost: # write to HTIF + .dword 0 +fromhost: + .dword 0 + +.data +N: + .dword 6 + +.EQU XLEN,64 +begin_signature: + .fill 2*(XLEN/32),4,0xdeadbeef # +end_signature: + +# Initialize stack with room for 512 bytes +.bss + .space 512 +topofstack: diff --git a/examples/exercises/3p15/sumtest.reference_output b/examples/exercises/3p15/sumtest.reference_output new file mode 100644 index 000000000..34200f638 --- /dev/null +++ b/examples/exercises/3p15/sumtest.reference_output @@ -0,0 +1,2 @@ +0000000000000015 +0000000000000025 diff --git a/examples/exercises/3p3/Makefile b/examples/exercises/3p3/Makefile new file mode 100644 index 000000000..ba7734ca1 --- /dev/null +++ b/examples/exercises/3p3/Makefile @@ -0,0 +1,19 @@ +TARGET = sumtest + +$(TARGET).objdump: $(TARGET) + riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump + +$(TARGET): $(TARGET).S sum.S Makefile + riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld $(TARGET).S sum.S + +sim: + spike +signature=$(TARGET).signature.output +signature-granularity=8 $(TARGET) + diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit + echo "Signature matches! Success!" + +clean: + rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output + + + diff --git a/examples/exercises/3p3/sum.S b/examples/exercises/3p3/sum.S new file mode 100644 index 000000000..ab87eb0ec --- /dev/null +++ b/examples/exercises/3p3/sum.S @@ -0,0 +1,32 @@ +// sum.s +// David_Harris@hmc.edu 24 December 2021 +// Add up numbers from 1 to N. + +// result in s0, i in s1, N in a0, return answer in a0 +// +// long sum(long N) { +// long result, i; +// result = 0; +// for (i=1; i<=N; i++) result = result + i; +// return result; +// } + +.global sum +sum: + addi sp, sp, -16 # make room to save s0 and s1 on the stack + sd s0, 0(sp) + sd s1, 8(sp) + + li s0, 0 # result = 0 + li s1, 1 # i = 1 +for: bgt s1, a0, done # exit loop if i > n + add s0, s0, s1 # result = result + i + addi s1, s1, 1 # i++ + j for # repeat + +done: + mv a0, s0 # put result in a0 to return + ld s0, 0(sp) # restore s0 and s1 from stack + ld s1, 8(sp) + addi sp, sp, 16 + ret # return from function diff --git a/examples/exercises/3p3/sumtest.S b/examples/exercises/3p3/sumtest.S new file mode 100644 index 000000000..a1b57689f --- /dev/null +++ b/examples/exercises/3p3/sumtest.S @@ -0,0 +1,43 @@ +// sumtest.S +// David_Harris@hmc.edu 24 December 2021 + +.global rvtest_entry_point +rvtest_entry_point: + la sp, topofstack # Initialize stack pointer + la t0, N # get address of N in data + ld a0, 0(t0) # load N + csrr s8, instret # count instructions before call + jal sum # call sum(N) + csrr s9, instret # count instructions after call + sub s9, s9, s8 # length of call + la t0, begin_signature # address of signature + sd a0, 0(t0) # store sum(N) in signature + sd s9, 8(t0) # record performance + +write_tohost: + la t1, tohost + li t0, 1 # 1 for success, 3 for failure + sd t0, 0(t1) # send success code + +self_loop: + j self_loop # wait + +.section .tohost +tohost: # write to HTIF + .dword 0 +fromhost: + .dword 0 + +.data +N: + .dword 6 + +.EQU XLEN,64 +begin_signature: + .fill 2*(XLEN/32),4,0xdeadbeef # +end_signature: + +# Initialize stack with room for 512 bytes +.bss + .space 512 +topofstack: diff --git a/examples/exercises/3p3/sumtest.reference_output b/examples/exercises/3p3/sumtest.reference_output new file mode 100644 index 000000000..34200f638 --- /dev/null +++ b/examples/exercises/3p3/sumtest.reference_output @@ -0,0 +1,2 @@ +0000000000000015 +0000000000000025 diff --git a/examples/exercises/3p5/Makefile b/examples/exercises/3p5/Makefile new file mode 100644 index 000000000..d93502ee6 --- /dev/null +++ b/examples/exercises/3p5/Makefile @@ -0,0 +1,32 @@ +TARGET = matvecmul + +$(TARGET).objdump: $(TARGET) + riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump + +$(TARGET): $(TARGET).c Makefile + riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\ + -march=rv64gc -mabi=lp64d -mcmodel=medany \ + -nostdlib -static -lm -fno-tree-loop-distribute-patterns \ + -T../../C/common/test.ld -I../../C/common \ + $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c +# Compiler flags: +# -o $(TARGET) defines the name of the output file +# -g generates debugging symbols for gdb +# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization +# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits +# -static forces static linking (no dynamic shared libraries on bare metal) +# -lm links the math library if necessary (when #include math.h) +# -nostdlib avoids inserting standard startup files and default libraries +# because we are using crt.s on bare metal +# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library +# -T specifies the linker file +# -I specifies the include path (e.g. for util.h) +# The last line defines the C files to compile. +# crt.S is needed as our startup file to initialize the processor +# syscalls.c implements printf through the HTIF for Spike +# other flags from riscv-tests makefiles that don't seem to be important +# -ffast-math -DPREALLOCATE=1 -std=gnu99 \ +# -fno-common -fno-builtin-printf -nostartfiles -lgcc \ + +clean: + rm -f $(TARGET) $(TARGET).objdump diff --git a/examples/exercises/3p5/matvecmul.c b/examples/exercises/3p5/matvecmul.c new file mode 100644 index 000000000..424c27ab0 --- /dev/null +++ b/examples/exercises/3p5/matvecmul.c @@ -0,0 +1,22 @@ +#include // supports printf +#include "util.h" // supports verify + +// Matrix-vector multiplication y = Ax. +// A is an m rows x n columns matrix. +void matvecmul(int A[], int x[], int y[], int m, int n) { + int i, j, sum; + for (i=0; i $(TARGET).objdump + +$(TARGET): $(TARGET).c Makefile + riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O2\ + -march=rv64gc -mabi=lp64d -mcmodel=medany \ + -nostdlib -static -lm -fno-tree-loop-distribute-patterns \ + -T../../C/common/test.ld -I../../C/common \ + $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c +# Compiler flags: +# -o $(TARGET) defines the name of the output file +# -g generates debugging symbols for gdb +# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization +# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits +# -static forces static linking (no dynamic shared libraries on bare metal) +# -lm links the math library if necessary (when #include math.h) +# -nostdlib avoids inserting standard startup files and default libraries +# because we are using crt.s on bare metal +# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library +# -T specifies the linker file +# -I specifies the include path (e.g. for util.h) +# The last line defines the C files to compile. +# crt.S is needed as our startup file to initialize the processor +# syscalls.c implements printf through the HTIF for Spike +# other flags from riscv-tests makefiles that don't seem to be important +# -ffast-math -DPREALLOCATE=1 -std=gnu99 \ +# -fno-common -fno-builtin-printf -nostartfiles -lgcc \ + +clean: + rm -f $(TARGET) $(TARGET).objdump diff --git a/examples/exercises/3p7/fir.c b/examples/exercises/3p7/fir.c new file mode 100644 index 000000000..05e969732 --- /dev/null +++ b/examples/exercises/3p7/fir.c @@ -0,0 +1,83 @@ +#include // supports printf +#include "util.h" // supports verify + +// Add two Q1.31 fixed point numbers +int add_q31(int a, int b) { + return a + b; +} + +// Multiplly two Q1.31 fixed point numbers +int mul_q31(int a, int b) { + long res = (long)a * (long)b; + int result = res >> 31; // shift right to get the 32-bit result; this is equivalent to shifting left by 1 and discarding the bottom 32 bits + //printf("mul_q31: a = %x, b = %x, res = %lx, result = %x\n", a, b, res, result); + return result; +} + + +// low pass filter x with coefficients c, result in y +// n is the length of x, m is the length of c +// y[i] = c[0]*x[i] + c[1]*x[i+1] + ... + c[m-1]*x[i+m-1] +// inputs in Q1.31 format +void fir(int x[], int c[], int y[], int n, int m) { + int i, j; + for (j=0; j $(TARGET).objdump + +$(TARGET): $(TARGET).c Makefile + riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\ + -march=rv64gc -mabi=lp64d -mcmodel=medany \ + -nostdlib -static -lm -fno-tree-loop-distribute-patterns \ + -T../../C/common/test.ld -I../../C/common \ + $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c +# Compiler flags: +# -o $(TARGET) defines the name of the output file +# -g generates debugging symbols for gdb +# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization +# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits +# -static forces static linking (no dynamic shared libraries on bare metal) +# -lm links the math library if necessary (when #include math.h) +# -nostdlib avoids inserting standard startup files and default libraries +# because we are using crt.s on bare metal +# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library +# -T specifies the linker file +# -I specifies the include path (e.g. for util.h) +# The last line defines the C files to compile. +# crt.S is needed as our startup file to initialize the processor +# syscalls.c implements printf through the HTIF for Spike +# other flags from riscv-tests makefiles that don't seem to be important +# -ffast-math -DPREALLOCATE=1 -std=gnu99 \ +# -fno-common -fno-builtin-printf -nostartfiles -lgcc \ + +clean: + rm -f $(TARGET) $(TARGET).objdump diff --git a/examples/exercises/3p9/inline.c b/examples/exercises/3p9/inline.c new file mode 100644 index 000000000..03787eb09 --- /dev/null +++ b/examples/exercises/3p9/inline.c @@ -0,0 +1,11 @@ +#include // supports printf +int main(void) { + int a = 3; + int b = 4; + int c; + // compute c = a + 2*b using inline assembly + asm volatile("slli %0, %1, 1" : "=r" (c) : "r" (b)); // c = b << 1 + asm volatile("add %0, %1, %2" : "=r" (c) : "r" (a), "r" (c)); // c = a + c + + printf("c = %d\n", c); +} diff --git a/examples/exercises/fma16/Makefile b/examples/exercises/fma16/Makefile new file mode 100644 index 000000000..3c2da67f5 --- /dev/null +++ b/examples/exercises/fma16/Makefile @@ -0,0 +1,16 @@ + + +CC = gcc +CFLAGS = -O3 -Wno-format-overflow +IFLAGS = -I$(WALLY)/addins/berkeley-softfloat-3/source/include/ +LIBS = $(WALLY)/addins/berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath +SRCS = $(wildcard *.c) +PROGS = $(patsubst %.c,%,$(SRCS)) + +all: $(PROGS) + +%: %.c + $(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS) + +clean: + rm -f $(PROGS) diff --git a/examples/exercises/fma16/fma16_testgen.c b/examples/exercises/fma16/fma16_testgen.c new file mode 100644 index 000000000..dd9339fba --- /dev/null +++ b/examples/exercises/fma16/fma16_testgen.c @@ -0,0 +1,147 @@ +// fma16_testgen.c +// David_Harris 8 February 2025 +// Generate tests for 16-bit FMA +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +#include +#include +#include +#include "softfloat.h" +#include "softfloat_types.h" + +typedef union sp { + float32_t v; + float f; +} sp; + +// lists of tests, terminated with 0x8000 +uint16_t easyExponents[] = {15, 0x8000}; +uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1 + +void softfloatInit(void) { + softfloat_roundingMode = softfloat_round_minMag; + softfloat_exceptionFlags = 0; + softfloat_detectTininess = softfloat_tininess_beforeRounding; +} + +float convFloat(float16_t f16) { + float32_t f32; + float res; + sp r; + + // convert half to float for printing + f32 = f16_to_f32(f16); + r.v = f32; + res = r.f; + return res; +} + +void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) { + float16_t result; + int op, flagVals; + char calc[80], flags[80]; + float32_t x32, y32, z32, r32; + float xf, yf, zf, rf; + float16_t smallest; + + if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply + if (!add) z.v = 0x0000; // force z to 0 to avoid add + if (negp) x.v ^= 0x8000; // flip sign of x to negate p + if (negz) z.v ^= 0x8000; // flip sign of z to negate z + op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz; +// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz); + softfloat_exceptionFlags = 0; // clear exceptions + result = f16_mulAdd(x, y, z); // call SoftFloat to compute expected result + + // Extract expected flags from SoftFloat + sprintf(flags, "NV: %d OF: %d UF: %d NX: %d", + (softfloat_exceptionFlags >> 4) % 2, + (softfloat_exceptionFlags >> 2) % 2, + (softfloat_exceptionFlags >> 1) % 2, + (softfloat_exceptionFlags) % 2); + // pack these four flags into one nibble, discarding DZ flag + flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8); + + // convert to floats for printing + xf = convFloat(x); + yf = convFloat(y); + zf = convFloat(z); + rf = convFloat(result); + if (mul) + if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf); + else sprintf(calc, "%f * %f = %f", xf, yf, rf); + else sprintf(calc, "%f + %f = %f", xf, zf, rf); + + // omit denorms, which aren't required for this project + smallest.v = 0x0400; + float16_t resultmag = result; + resultmag.v &= 0x7FFF; // take absolute value + if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: "); + if ((softfloat_exceptionFlags) >> 1 % 2) fprintf(fptr, "// skip underflow: "); + + // skip special cases if requested + if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: "); + if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: "); + if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: "); + + // print the test case + fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags); +} + +void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases, + FILE *fptr, int *numCases) { + int i, j; + + // Loop over all of the exponents and fractions, generating and counting all cases + fprintf(fptr, "%s", desc); fprintf(fptr, "\n"); + *numCases=0; + for (i=0; e[i] != 0x8000; i++) + for (j=0; f[j] != 0x8000; j++) { + cases[*numCases].v = f[j] | e[i]<<10; + *numCases = *numCases + 1; + } +} + +void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) { + int i, j, k, numCases; + float16_t x, y, z; + float16_t cases[100000]; + FILE *fptr; + char fn[80]; + + sprintf(fn, "work/%s.tv", testName); + if ((fptr = fopen(fn, "w")) == 0) { + printf("Error opening to write file %s. Does directory exist?\n", fn); + exit(1); + } + prepTests(e, f, testName, desc, cases, fptr, &numCases); + z.v = 0x0000; + for (i=0; i < numCases; i++) { + x.v = cases[i].v; + for (j=0; j