Added C test cases

This commit is contained in:
David Harris 2022-01-11 21:01:48 +00:00
parent 0b3d3b768b
commit 486cfdc3a5
20 changed files with 4298 additions and 24 deletions

3
.gitmodules vendored
View File

@ -8,3 +8,6 @@
[submodule "addins/imperas-riscv-tests"]
path = addins/imperas-riscv-tests
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
[submodule "addins/riscv-tests"]
path = addins/riscv-tests
url = https://github.com/riscv-software-src/riscv-tests

1
addins/riscv-tests Submodule

@ -0,0 +1 @@
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7

24
examples/C/common/LICENSE Normal file
View File

@ -0,0 +1,24 @@
Copyright (c) 2012-2015, The Regents of the University of California (Regents).
All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Regents nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

1
examples/C/common/README Normal file
View File

@ -0,0 +1 @@
These files are from github.com/riscv-software-src/riscv-tests

225
examples/C/common/crt.S Normal file
View File

@ -0,0 +1,225 @@
# See LICENSE for license details.
#include "encoding.h"
#if __riscv_xlen == 64
# define LREG ld
# define SREG sd
# define REGBYTES 8
#else
# define LREG lw
# define SREG sw
# define REGBYTES 4
#endif
.section ".text.init"
.globl _start
_start:
li x1, 0
li x2, 0
li x3, 0
li x4, 0
li x5, 0
li x6, 0
li x7, 0
li x8, 0
li x9, 0
li x10,0
li x11,0
li x12,0
li x13,0
li x14,0
li x15,0
li x16,0
li x17,0
li x18,0
li x19,0
li x20,0
li x21,0
li x22,0
li x23,0
li x24,0
li x25,0
li x26,0
li x27,0
li x28,0
li x29,0
li x30,0
li x31,0
# enable FPU and accelerator if present
li t0, MSTATUS_FS | MSTATUS_XS
csrs mstatus, t0
# make sure XLEN agrees with compilation choice
li t0, 1
slli t0, t0, 31
#if __riscv_xlen == 64
bgez t0, 1f
#else
bltz t0, 1f
#endif
2:
li a0, 1
sw a0, tohost, t0
j 2b
1:
#ifdef __riscv_flen
# initialize FPU if we have one
la t0, 1f
csrw mtvec, t0
fssr x0
fmv.s.x f0, x0
fmv.s.x f1, x0
fmv.s.x f2, x0
fmv.s.x f3, x0
fmv.s.x f4, x0
fmv.s.x f5, x0
fmv.s.x f6, x0
fmv.s.x f7, x0
fmv.s.x f8, x0
fmv.s.x f9, x0
fmv.s.x f10,x0
fmv.s.x f11,x0
fmv.s.x f12,x0
fmv.s.x f13,x0
fmv.s.x f14,x0
fmv.s.x f15,x0
fmv.s.x f16,x0
fmv.s.x f17,x0
fmv.s.x f18,x0
fmv.s.x f19,x0
fmv.s.x f20,x0
fmv.s.x f21,x0
fmv.s.x f22,x0
fmv.s.x f23,x0
fmv.s.x f24,x0
fmv.s.x f25,x0
fmv.s.x f26,x0
fmv.s.x f27,x0
fmv.s.x f28,x0
fmv.s.x f29,x0
fmv.s.x f30,x0
fmv.s.x f31,x0
1:
#endif
# initialize trap vector
la t0, trap_entry
csrw mtvec, t0
# initialize global pointer
.option push
.option norelax
la gp, __global_pointer$
.option pop
la tp, _end + 63
and tp, tp, -64
# get core id
csrr a0, mhartid
# for now, assume only 1 core
li a1, 1
1:bgeu a0, a1, 1b
# give each core 128KB of stack + TLS
#define STKSHIFT 17
add sp, a0, 1
sll sp, sp, STKSHIFT
add sp, sp, tp
sll a2, a0, STKSHIFT
add tp, tp, a2
j _init
.align 2
trap_entry:
addi sp, sp, -272
SREG x1, 1*REGBYTES(sp)
SREG x2, 2*REGBYTES(sp)
SREG x3, 3*REGBYTES(sp)
SREG x4, 4*REGBYTES(sp)
SREG x5, 5*REGBYTES(sp)
SREG x6, 6*REGBYTES(sp)
SREG x7, 7*REGBYTES(sp)
SREG x8, 8*REGBYTES(sp)
SREG x9, 9*REGBYTES(sp)
SREG x10, 10*REGBYTES(sp)
SREG x11, 11*REGBYTES(sp)
SREG x12, 12*REGBYTES(sp)
SREG x13, 13*REGBYTES(sp)
SREG x14, 14*REGBYTES(sp)
SREG x15, 15*REGBYTES(sp)
SREG x16, 16*REGBYTES(sp)
SREG x17, 17*REGBYTES(sp)
SREG x18, 18*REGBYTES(sp)
SREG x19, 19*REGBYTES(sp)
SREG x20, 20*REGBYTES(sp)
SREG x21, 21*REGBYTES(sp)
SREG x22, 22*REGBYTES(sp)
SREG x23, 23*REGBYTES(sp)
SREG x24, 24*REGBYTES(sp)
SREG x25, 25*REGBYTES(sp)
SREG x26, 26*REGBYTES(sp)
SREG x27, 27*REGBYTES(sp)
SREG x28, 28*REGBYTES(sp)
SREG x29, 29*REGBYTES(sp)
SREG x30, 30*REGBYTES(sp)
SREG x31, 31*REGBYTES(sp)
csrr a0, mcause
csrr a1, mepc
mv a2, sp
jal handle_trap
csrw mepc, a0
# Remain in M-mode after eret
li t0, MSTATUS_MPP
csrs mstatus, t0
LREG x1, 1*REGBYTES(sp)
LREG x2, 2*REGBYTES(sp)
LREG x3, 3*REGBYTES(sp)
LREG x4, 4*REGBYTES(sp)
LREG x5, 5*REGBYTES(sp)
LREG x6, 6*REGBYTES(sp)
LREG x7, 7*REGBYTES(sp)
LREG x8, 8*REGBYTES(sp)
LREG x9, 9*REGBYTES(sp)
LREG x10, 10*REGBYTES(sp)
LREG x11, 11*REGBYTES(sp)
LREG x12, 12*REGBYTES(sp)
LREG x13, 13*REGBYTES(sp)
LREG x14, 14*REGBYTES(sp)
LREG x15, 15*REGBYTES(sp)
LREG x16, 16*REGBYTES(sp)
LREG x17, 17*REGBYTES(sp)
LREG x18, 18*REGBYTES(sp)
LREG x19, 19*REGBYTES(sp)
LREG x20, 20*REGBYTES(sp)
LREG x21, 21*REGBYTES(sp)
LREG x22, 22*REGBYTES(sp)
LREG x23, 23*REGBYTES(sp)
LREG x24, 24*REGBYTES(sp)
LREG x25, 25*REGBYTES(sp)
LREG x26, 26*REGBYTES(sp)
LREG x27, 27*REGBYTES(sp)
LREG x28, 28*REGBYTES(sp)
LREG x29, 29*REGBYTES(sp)
LREG x30, 30*REGBYTES(sp)
LREG x31, 31*REGBYTES(sp)
addi sp, sp, 272
mret
.section ".tohost","aw",@progbits
.align 6
.globl tohost
tohost: .dword 0
.align 6
.globl fromhost
fromhost: .dword 0

2832
examples/C/common/encoding.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
// See LICENSE for license details.
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <limits.h>
#include <sys/signal.h>
#include "util.h"
#define SYS_write 64
#undef strcmp
extern volatile uint64_t tohost;
extern volatile uint64_t fromhost;
static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
{
volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
magic_mem[0] = which;
magic_mem[1] = arg0;
magic_mem[2] = arg1;
magic_mem[3] = arg2;
__sync_synchronize();
tohost = (uintptr_t)magic_mem;
while (fromhost == 0)
;
fromhost = 0;
__sync_synchronize();
return magic_mem[0];
}
#define NUM_COUNTERS 2
static uintptr_t counters[NUM_COUNTERS];
static char* counter_names[NUM_COUNTERS];
void setStats(int enable)
{
int i = 0;
#define READ_CTR(name) do { \
while (i >= NUM_COUNTERS) ; \
uintptr_t csr = read_csr(name); \
if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
counters[i++] = csr; \
} while (0)
READ_CTR(mcycle);
READ_CTR(minstret);
#undef READ_CTR
}
void __attribute__((noreturn)) tohost_exit(uintptr_t code)
{
tohost = (code << 1) | 1;
while (1);
}
uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
{
tohost_exit(1337);
}
void exit(int code)
{
tohost_exit(code);
}
void abort()
{
exit(128 + SIGABRT);
}
void printstr(const char* s)
{
syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
}
void __attribute__((weak)) thread_entry(int cid, int nc)
{
// multi-threaded programs override this function.
// for the case of single-threaded programs, only let core 0 proceed.
while (cid != 0);
}
int __attribute__((weak)) main(int argc, char** argv)
{
// single-threaded programs override this function.
printstr("Implement main(), foo!\n");
return -1;
}
static void init_tls()
{
register void* thread_pointer asm("tp");
extern char _tdata_begin, _tdata_end, _tbss_end;
size_t tdata_size = &_tdata_end - &_tdata_begin;
memcpy(thread_pointer, &_tdata_begin, tdata_size);
size_t tbss_size = &_tbss_end - &_tdata_end;
memset(thread_pointer + tdata_size, 0, tbss_size);
}
void _init(int cid, int nc)
{
init_tls();
thread_entry(cid, nc);
// only single-threaded programs should ever get here.
int ret = main(0, 0);
char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
char* pbuf = buf;
for (int i = 0; i < NUM_COUNTERS; i++)
if (counters[i])
pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
if (pbuf != buf)
printstr(buf);
exit(ret);
}
#undef putchar
int putchar(int ch)
{
static __thread char buf[64] __attribute__((aligned(64)));
static __thread int buflen = 0;
buf[buflen++] = ch;
if (ch == '\n' || buflen == sizeof(buf))
{
syscall(SYS_write, 1, (uintptr_t)buf, buflen);
buflen = 0;
}
return 0;
}
void printhex(uint64_t x)
{
char str[17];
int i;
for (i = 0; i < 16; i++)
{
str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
x >>= 4;
}
str[16] = 0;
printstr(str);
}
static inline void printnum(void (*putch)(int, void**), void **putdat,
unsigned long long num, unsigned base, int width, int padc)
{
unsigned digs[sizeof(num)*CHAR_BIT];
int pos = 0;
while (1)
{
digs[pos++] = num % base;
if (num < base)
break;
num /= base;
}
while (width-- > pos)
putch(padc, putdat);
while (pos-- > 0)
putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
}
static unsigned long long getuint(va_list *ap, int lflag)
{
if (lflag >= 2)
return va_arg(*ap, unsigned long long);
else if (lflag)
return va_arg(*ap, unsigned long);
else
return va_arg(*ap, unsigned int);
}
static long long getint(va_list *ap, int lflag)
{
if (lflag >= 2)
return va_arg(*ap, long long);
else if (lflag)
return va_arg(*ap, long);
else
return va_arg(*ap, int);
}
static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
{
register const char* p;
const char* last_fmt;
register int ch, err;
unsigned long long num;
int base, lflag, width, precision, altflag;
char padc;
while (1) {
while ((ch = *(unsigned char *) fmt) != '%') {
if (ch == '\0')
return;
fmt++;
putch(ch, putdat);
}
fmt++;
// Process a %-escape sequence
last_fmt = fmt;
padc = ' ';
width = -1;
precision = -1;
lflag = 0;
altflag = 0;
reswitch:
switch (ch = *(unsigned char *) fmt++) {
// flag to pad on the right
case '-':
padc = '-';
goto reswitch;
// flag to pad with 0's instead of spaces
case '0':
padc = '0';
goto reswitch;
// width field
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
for (precision = 0; ; ++fmt) {
precision = precision * 10 + ch - '0';
ch = *fmt;
if (ch < '0' || ch > '9')
break;
}
goto process_precision;
case '*':
precision = va_arg(ap, int);
goto process_precision;
case '.':
if (width < 0)
width = 0;
goto reswitch;
case '#':
altflag = 1;
goto reswitch;
process_precision:
if (width < 0)
width = precision, precision = -1;
goto reswitch;
// long flag (doubled for long long)
case 'l':
lflag++;
goto reswitch;
// character
case 'c':
putch(va_arg(ap, int), putdat);
break;
// string
case 's':
if ((p = va_arg(ap, char *)) == NULL)
p = "(null)";
if (width > 0 && padc != '-')
for (width -= strnlen(p, precision); width > 0; width--)
putch(padc, putdat);
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
putch(ch, putdat);
p++;
}
for (; width > 0; width--)
putch(' ', putdat);
break;
// (signed) decimal
case 'd':
num = getint(&ap, lflag);
if ((long long) num < 0) {
putch('-', putdat);
num = -(long long) num;
}
base = 10;
goto signed_number;
// unsigned decimal
case 'u':
base = 10;
goto unsigned_number;
// (unsigned) octal
case 'o':
// should do something with padding so it's always 3 octits
base = 8;
goto unsigned_number;
// pointer
case 'p':
static_assert(sizeof(long) == sizeof(void*));
lflag = 1;
putch('0', putdat);
putch('x', putdat);
/* fall through to 'x' */
// (unsigned) hexadecimal
case 'x':
base = 16;
unsigned_number:
num = getuint(&ap, lflag);
signed_number:
printnum(putch, putdat, num, base, width, padc);
break;
// escaped '%' character
case '%':
putch(ch, putdat);
break;
// unrecognized escape sequence - just print it literally
default:
putch('%', putdat);
fmt = last_fmt;
break;
}
}
}
int printf(const char* fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vprintfmt((void*)putchar, 0, fmt, ap);
va_end(ap);
return 0; // incorrect return value, but who cares, anyway?
}
int sprintf(char* str, const char* fmt, ...)
{
va_list ap;
char* str0 = str;
va_start(ap, fmt);
void sprintf_putch(int ch, void** data)
{
char** pstr = (char**)data;
**pstr = ch;
(*pstr)++;
}
vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
*str = 0;
va_end(ap);
return str - str0;
}
void* memcpy(void* dest, const void* src, size_t len)
{
if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
const uintptr_t* s = src;
uintptr_t *d = dest;
while (d < (uintptr_t*)(dest + len))
*d++ = *s++;
} else {
const char* s = src;
char *d = dest;
while (d < (char*)(dest + len))
*d++ = *s++;
}
return dest;
}
void* memset(void* dest, int byte, size_t len)
{
if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
uintptr_t word = byte & 0xFF;
word |= word << 8;
word |= word << 16;
word |= word << 16 << 16;
uintptr_t *d = dest;
while (d < (uintptr_t*)(dest + len))
*d++ = word;
} else {
char *d = dest;
while (d < (char*)(dest + len))
*d++ = byte;
}
return dest;
}
size_t strlen(const char *s)
{
const char *p = s;
while (*p)
p++;
return p - s;
}
size_t strnlen(const char *s, size_t n)
{
const char *p = s;
while (n-- && *p)
p++;
return p - s;
}
int strcmp(const char* s1, const char* s2)
{
unsigned char c1, c2;
do {
c1 = *s1++;
c2 = *s2++;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
}
char* strcpy(char* dest, const char* src)
{
char* d = dest;
while ((*d++ = *src++))
;
return dest;
}
long atol(const char* str)
{
long res = 0;
int sign = 0;
while (*str == ' ')
str++;
if (*str == '-' || *str == '+') {
sign = *str == '-';
str++;
}
while (*str) {
res *= 10;
res += *str++ - '0';
}
return sign ? -res : res;
}

66
examples/C/common/test.ld Normal file
View File

@ -0,0 +1,66 @@
/*======================================================================*/
/* Proxy kernel linker script */
/*======================================================================*/
/* This is the linker script used when building the proxy kernel. */
/*----------------------------------------------------------------------*/
/* Setup */
/*----------------------------------------------------------------------*/
/* The OUTPUT_ARCH command specifies the machine architecture where the
argument is one of the names used in the BFD library. More
specifically one of the entires in bfd/cpu-mips.c */
OUTPUT_ARCH( "riscv" )
ENTRY(_start)
/*----------------------------------------------------------------------*/
/* Sections */
/*----------------------------------------------------------------------*/
SECTIONS
{
/* text: test code section */
. = 0x80000000;
.text.init : { *(.text.init) }
. = ALIGN(0x1000);
.tohost : { *(.tohost) }
. = ALIGN(0x1000);
.text : { *(.text) }
/* data segment */
.data : { *(.data) }
.sdata : {
__global_pointer$ = . + 0x800;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
/* bss segment */
.sbss : {
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss : { *(.bss) }
/* thread-local data segment */
.tdata :
{
_tdata_begin = .;
*(.tdata)
_tdata_end = .;
}
.tbss :
{
*(.tbss)
_tbss_end = .;
}
/* End of uninitalized data segement */
_end = .;
}

90
examples/C/common/util.h Normal file
View File

@ -0,0 +1,90 @@
// See LICENSE for license details.
#ifndef __UTIL_H
#define __UTIL_H
extern void setStats(int enable);
#include <stdint.h>
#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
static int verify(int n, const volatile int* test, const int* verify)
{
int i;
// Unrolled for faster verification
for (i = 0; i < n/2*2; i+=2)
{
int t0 = test[i], t1 = test[i+1];
int v0 = verify[i], v1 = verify[i+1];
if (t0 != v0) return i+1;
if (t1 != v1) return i+2;
}
if (n % 2 != 0 && test[n-1] != verify[n-1])
return n;
return 0;
}
static int verifyDouble(int n, const volatile double* test, const double* verify)
{
int i;
// Unrolled for faster verification
for (i = 0; i < n/2*2; i+=2)
{
double t0 = test[i], t1 = test[i+1];
double v0 = verify[i], v1 = verify[i+1];
int eq1 = t0 == v0, eq2 = t1 == v1;
if (!(eq1 & eq2)) return i+1+eq1;
}
if (n % 2 != 0 && test[n-1] != verify[n-1])
return n;
return 0;
}
static void __attribute__((noinline)) barrier(int ncores)
{
static volatile int sense;
static volatile int count;
static __thread int threadsense;
__sync_synchronize();
threadsense = !threadsense;
if (__sync_fetch_and_add(&count, 1) == ncores-1)
{
count = 0;
sense = threadsense;
}
else while(sense != threadsense)
;
__sync_synchronize();
}
static uint64_t lfsr(uint64_t x)
{
uint64_t bit = (x ^ (x >> 1)) & 1;
return (x >> 1) | (bit << 62);
}
static uintptr_t insn_len(uintptr_t pc)
{
return (*(unsigned short*)pc & 3) ? 4 : 2;
}
#ifdef __riscv
#include "encoding.h"
#endif
#define stringify_1(s) #s
#define stringify(s) stringify_1(s)
#define stats(code, iter) do { \
unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
code; \
_c += read_csr(mcycle), _i += read_csr(minstret); \
if (cid == 0) \
printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
} while(0)
#endif //__UTIL_H

16
examples/C/mm/Makefile Normal file
View File

@ -0,0 +1,16 @@
TARGET = mm
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64d -mcmodel=medany \
-DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common \
-fno-builtin-printf -fno-tree-loop-distribute-patterns \
-static -nostdlib -nostartfiles -lm -lgcc -T../common/test.ld \
-I../common \
-O *.c \
../common/crt.S ../common/syscalls.c
clean:
rm -f $(TARGET) $(TARGET).objdump

36
examples/C/mm/common.h Normal file
View File

@ -0,0 +1,36 @@
// See LICENSE for license details.
#ifndef _MM_H
#define _MM_H
#include <string.h>
#include <stdint.h>
#include <math.h>
#ifdef SP
typedef float t;
#define fma fmaf
#else
typedef double t;
#endif
#define inline inline __attribute__((always_inline))
#define alloca_aligned(s, a) ((void*)(((uintptr_t)alloca((s)+(a)-1)+(a)-1)&~((a)-1)))
#include "rb.h"
#ifdef __cplusplus
extern "C" {
#endif
void mm(size_t m, size_t n, size_t p,
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc);
#ifdef __cplusplus
}
#endif
//void rb(t* a, t* b, t* c, size_t lda, size_t ldb, size_t ldc);
#endif

BIN
examples/C/mm/mm Executable file

Binary file not shown.

152
examples/C/mm/mm.c Normal file
View File

@ -0,0 +1,152 @@
// See LICENSE for license details.
#include "common.h"
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <alloca.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static void mm_naive(size_t m, size_t n, size_t p,
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
t s0 = c[i*ldc+j], s1 = 0, s2 = 0, s3 = 0;
for (size_t k = 0; k < p/4*4; k+=4)
{
s0 = fma(a[i*lda+k+0], b[(k+0)*ldb+j], s0);
s1 = fma(a[i*lda+k+1], b[(k+1)*ldb+j], s1);
s2 = fma(a[i*lda+k+2], b[(k+2)*ldb+j], s2);
s3 = fma(a[i*lda+k+3], b[(k+3)*ldb+j], s3);
}
for (size_t k = p/4*4; k < p; k++)
s0 = fma(a[i*lda+k], b[k*ldb+j], s0);
c[i*ldc+j] = (s0 + s1) + (s2 + s3);
}
}
}
static inline void mm_rb(size_t m, size_t n, size_t p,
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
{
size_t mb = m/RBM*RBM, nb = n/RBN*RBN;
for (size_t i = 0; i < mb; i += RBM)
{
for (size_t j = 0; j < nb; j += RBN)
kloop(p, a+i*lda, lda, b+j, ldb, c+i*ldc+j, ldc);
mm_naive(RBM, n - nb, p, a+i*lda, lda, b+nb, ldb, c+i*ldc+nb, ldc);
}
mm_naive(m - mb, n, p, a+mb*lda, lda, b, ldb, c+mb*ldc, ldc);
}
static inline void repack(t* a, size_t lda, const t* a0, size_t lda0, size_t m, size_t p)
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < p/8*8; j+=8)
{
t t0 = a0[i*lda0+j+0];
t t1 = a0[i*lda0+j+1];
t t2 = a0[i*lda0+j+2];
t t3 = a0[i*lda0+j+3];
t t4 = a0[i*lda0+j+4];
t t5 = a0[i*lda0+j+5];
t t6 = a0[i*lda0+j+6];
t t7 = a0[i*lda0+j+7];
a[i*lda+j+0] = t0;
a[i*lda+j+1] = t1;
a[i*lda+j+2] = t2;
a[i*lda+j+3] = t3;
a[i*lda+j+4] = t4;
a[i*lda+j+5] = t5;
a[i*lda+j+6] = t6;
a[i*lda+j+7] = t7;
}
for (size_t j = p/8*8; j < p; j++)
a[i*lda+j] = a0[i*lda0+j];
}
}
static void mm_cb(size_t m, size_t n, size_t p,
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
{
size_t nmb = m/CBM, nnb = n/CBN, npb = p/CBK;
size_t mb = nmb*CBM, nb = nnb*CBN, pb = npb*CBK;
//t a1[mb*pb], b1[pb*nb], c1[mb*nb];
t* a1 = (t*)alloca_aligned(sizeof(t)*mb*pb, 8192);
t* b1 = (t*)alloca_aligned(sizeof(t)*pb*nb, 8192);
t* c1 = (t*)alloca_aligned(sizeof(t)*mb*nb, 8192);
for (size_t i = 0; i < mb; i += CBM)
for (size_t j = 0; j < pb; j += CBK)
repack(a1 + (npb*(i/CBM) + j/CBK)*(CBM*CBK), CBK, a + i*lda + j, lda, CBM, CBK);
for (size_t i = 0; i < pb; i += CBK)
for (size_t j = 0; j < nb; j += CBN)
repack(b1 + (nnb*(i/CBK) + j/CBN)*(CBK*CBN), CBN, b + i*ldb + j, ldb, CBK, CBN);
for (size_t i = 0; i < mb; i += CBM)
for (size_t j = 0; j < nb; j += CBN)
repack(c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, c + i*ldc + j, ldc, CBM, CBN);
for (size_t i = 0; i < mb; i += CBM)
{
for (size_t j = 0; j < nb; j += CBN)
{
for (size_t k = 0; k < pb; k += CBK)
{
mm_rb(CBM, CBN, CBK,
a1 + (npb*(i/CBM) + k/CBK)*(CBM*CBK), CBK,
b1 + (nnb*(k/CBK) + j/CBN)*(CBK*CBN), CBN,
c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
}
if (pb < p)
{
mm_rb(CBM, CBN, p - pb,
a + i*lda + pb, lda,
b + pb*ldb + j, ldb,
c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
}
}
if (nb < n)
{
for (size_t k = 0; k < p; k += CBK)
{
mm_rb(CBM, n - nb, MIN(p - k, CBK),
a + i*lda + k, lda,
b + k*ldb + nb, ldb,
c + i*ldc + nb, ldc);
}
}
}
if (mb < m)
{
for (size_t j = 0; j < n; j += CBN)
{
for (size_t k = 0; k < p; k += CBK)
{
mm_rb(m - mb, MIN(n - j, CBN), MIN(p - k, CBK),
a + mb*lda + k, lda,
b + k*ldb + j, ldb,
c + mb*ldc + j, ldc);
}
}
}
for (size_t i = 0; i < mb; i += CBM)
for (size_t j = 0; j < nb; j += CBN)
repack(c + i*ldc + j, ldc, c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, CBM, CBN);
}
void mm(size_t m, size_t n, size_t p,
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
{
if (__builtin_expect(m <= 2*CBM && n <= 2*CBN && p <= 2*CBK, 1))
mm_rb(m, n, p, a, lda, b, ldb, c, ldc);
else
mm_cb(m, n, p, a, lda, b, ldb, c, ldc);
}

76
examples/C/mm/mm_main.c Normal file
View File

@ -0,0 +1,76 @@
// See LICENSE for license details.
#include "common.h"
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "util.h"
#pragma GCC optimize ("unroll-loops")
int main(int argc, char** argv)
//void thread_entry(int cid, int nc)
{
const int R = 8;
int m, n, p;
uint64_t s = 0xdeadbeefU;
int cid = 0;
int nc = 0;
m = CBM;
n = CBN;
p = CBK;
t a[m*p];
t b[p*n];
t c[m*n];
for (size_t i = 0; i < m; i++)
for (size_t j = 0; j < p; j++)
a[i*p+j] = (t)(s = lfsr(s));
for (size_t i = 0; i < p; i++)
for (size_t j = 0; j < n; j++)
b[i*n+j] = (t)(s = lfsr(s));
memset(c, 0, m*n*sizeof(c[0]));
size_t instret, cycles;
for (int i = 0; i < R; i++)
{
instret = -read_csr(minstret);
cycles = -read_csr(mcycle);
mm(m, n, p, a, p, b, n, c, n);
instret += read_csr(minstret);
cycles += read_csr(mcycle);
}
asm volatile("fence");
printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
cid, RBM, RBN, RBK, CBM, CBN, CBK);
printf("C%d: %d instructions\n", cid, (int)(instret));
printf("C%d: %d cycles\n", cid, (int)(cycles));
printf("C%d: %d flops\n", cid, 2*m*n*p);
printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));
#if 1
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
t s = 0;
for (size_t k = 0; k < p; k++)
s += a[i*p+k] * b[k*n+j];
s *= R;
if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
{
printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
exit(1);
}
}
}
#endif
//barrier(nc);
exit(0);
}

View File

@ -0,0 +1,81 @@
import scala.sys.process._
object MMGen {
implicit def i2s(i: Int) = i.toString
def writeFile(name: String, contents: String) = {
val f = new java.io.FileWriter(name)
f.write(contents)
f.close
}
var indent = 0
def spacing = " " * indent
def assign(lhs: String, rhs: String) =
spacing + lhs + " = " + rhs + ";\n"
def init(t: String, n: String, v: String) =
assign(t+" "+n, v)
def open_block(s: String = "") = {
val result = (if (s != "") spacing + s else "") + spacing + "{\n"
indent = indent + 1
result
}
def close_block = {
indent = indent - 1
spacing + "}\n"
}
def ar(m: String, i: String) = m+"["+i+"]"
def r(a: String, b: String*) = (a :: b.toList).reduceLeft(_+"_"+_)
def rb(m: Int, n: Int, p: Int) = {
var s = open_block("static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)\n")
for (i <- 0 until m)
s += init("t*", r("c", i), "&"+ar("c", "ldc*"+i))
for (i <- 0 until m; j <- 0 until n)
s += init("t", r("c", i, j), ar(r("c", i), j))
def doit(m: Int, n: Int, p: Int) = {
for (i <- 0 until m)
s += init("t*", r("a", i), "&"+ar("a", "lda*"+i))
for (k <- 0 until p)
s += init("t*", r("b", k), "&"+ar("b", "ldb*"+k))
for (k <- 0 until p; i <- 0 until m; j <- 0 until n)
s += assign(r("c", i, j), "fma(" + ar(r("a", i), k) + ", " + ar(r("b", k), j) + ", " + r("c", i, j) + ")")
}
s += open_block("for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)\n")
doit(m, n, p)
s += close_block
s += open_block("for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)\n")
doit(m, n, 1)
s += close_block
for (i <- 0 until m; j <- 0 until n)
s += assign(ar(r("c", i), j), r("c", i, j))
s += close_block
s
}
def gcd(a: Int, b: Int): Int = if (b == 0) a else gcd(b, a%b)
def lcm(a: Int, b: Int): Int = a*b/gcd(a, b)
def lcm(a: Seq[Int]): Int = {
if (a.tail.isEmpty) a.head
else lcm(a.head, lcm(a.tail))
}
def test1(m: Int, n: Int, p: Int, m1: Int, n1: Int, p1: Int) = {
val decl = "static const int RBM = "+m+", RBN = "+n+", RBK = "+p+";\n" +
"static const int CBM = "+m1+", CBN = "+n1+", CBK = "+p1+";\n"
writeFile("rb.h", decl + rb(m, n, p))
//"make"!!
"make run"!
("cp a.out " + Seq("b", m, n, p, m1, n1, p1, "run").reduce(_+"."+_))!
}
def main(args: Array[String]): Unit = {
test1(4, 5, 6, 24, 25, 24)
//for (i <- 4 to 6; j <- 4 to 6; k <- 4 to 6)
// test1(i, j, k, if (i == 5) 35 else 36, if (j == 5) 35 else 36, if (k == 5) 35 else 36)
}
}

210
examples/C/mm/rb.h Normal file
View File

@ -0,0 +1,210 @@
static const int RBM = 4, RBN = 5, RBK = 6;
static const int CBM = 24, CBN = 25, CBK = 24;
static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)
{
t* c_0 = &c[ldc*0];
t* c_1 = &c[ldc*1];
t* c_2 = &c[ldc*2];
t* c_3 = &c[ldc*3];
t c_0_0 = c_0[0];
t c_0_1 = c_0[1];
t c_0_2 = c_0[2];
t c_0_3 = c_0[3];
t c_0_4 = c_0[4];
t c_1_0 = c_1[0];
t c_1_1 = c_1[1];
t c_1_2 = c_1[2];
t c_1_3 = c_1[3];
t c_1_4 = c_1[4];
t c_2_0 = c_2[0];
t c_2_1 = c_2[1];
t c_2_2 = c_2[2];
t c_2_3 = c_2[3];
t c_2_4 = c_2[4];
t c_3_0 = c_3[0];
t c_3_1 = c_3[1];
t c_3_2 = c_3[2];
t c_3_3 = c_3[3];
t c_3_4 = c_3[4];
for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)
{
t* a_0 = &a[lda*0];
t* a_1 = &a[lda*1];
t* a_2 = &a[lda*2];
t* a_3 = &a[lda*3];
t* b_0 = &b[ldb*0];
t* b_1 = &b[ldb*1];
t* b_2 = &b[ldb*2];
t* b_3 = &b[ldb*3];
t* b_4 = &b[ldb*4];
t* b_5 = &b[ldb*5];
c_0_0 = fma(a_0[0], b_0[0], c_0_0);
c_0_1 = fma(a_0[0], b_0[1], c_0_1);
c_0_2 = fma(a_0[0], b_0[2], c_0_2);
c_0_3 = fma(a_0[0], b_0[3], c_0_3);
c_0_4 = fma(a_0[0], b_0[4], c_0_4);
c_1_0 = fma(a_1[0], b_0[0], c_1_0);
c_1_1 = fma(a_1[0], b_0[1], c_1_1);
c_1_2 = fma(a_1[0], b_0[2], c_1_2);
c_1_3 = fma(a_1[0], b_0[3], c_1_3);
c_1_4 = fma(a_1[0], b_0[4], c_1_4);
c_2_0 = fma(a_2[0], b_0[0], c_2_0);
c_2_1 = fma(a_2[0], b_0[1], c_2_1);
c_2_2 = fma(a_2[0], b_0[2], c_2_2);
c_2_3 = fma(a_2[0], b_0[3], c_2_3);
c_2_4 = fma(a_2[0], b_0[4], c_2_4);
c_3_0 = fma(a_3[0], b_0[0], c_3_0);
c_3_1 = fma(a_3[0], b_0[1], c_3_1);
c_3_2 = fma(a_3[0], b_0[2], c_3_2);
c_3_3 = fma(a_3[0], b_0[3], c_3_3);
c_3_4 = fma(a_3[0], b_0[4], c_3_4);
c_0_0 = fma(a_0[1], b_1[0], c_0_0);
c_0_1 = fma(a_0[1], b_1[1], c_0_1);
c_0_2 = fma(a_0[1], b_1[2], c_0_2);
c_0_3 = fma(a_0[1], b_1[3], c_0_3);
c_0_4 = fma(a_0[1], b_1[4], c_0_4);
c_1_0 = fma(a_1[1], b_1[0], c_1_0);
c_1_1 = fma(a_1[1], b_1[1], c_1_1);
c_1_2 = fma(a_1[1], b_1[2], c_1_2);
c_1_3 = fma(a_1[1], b_1[3], c_1_3);
c_1_4 = fma(a_1[1], b_1[4], c_1_4);
c_2_0 = fma(a_2[1], b_1[0], c_2_0);
c_2_1 = fma(a_2[1], b_1[1], c_2_1);
c_2_2 = fma(a_2[1], b_1[2], c_2_2);
c_2_3 = fma(a_2[1], b_1[3], c_2_3);
c_2_4 = fma(a_2[1], b_1[4], c_2_4);
c_3_0 = fma(a_3[1], b_1[0], c_3_0);
c_3_1 = fma(a_3[1], b_1[1], c_3_1);
c_3_2 = fma(a_3[1], b_1[2], c_3_2);
c_3_3 = fma(a_3[1], b_1[3], c_3_3);
c_3_4 = fma(a_3[1], b_1[4], c_3_4);
c_0_0 = fma(a_0[2], b_2[0], c_0_0);
c_0_1 = fma(a_0[2], b_2[1], c_0_1);
c_0_2 = fma(a_0[2], b_2[2], c_0_2);
c_0_3 = fma(a_0[2], b_2[3], c_0_3);
c_0_4 = fma(a_0[2], b_2[4], c_0_4);
c_1_0 = fma(a_1[2], b_2[0], c_1_0);
c_1_1 = fma(a_1[2], b_2[1], c_1_1);
c_1_2 = fma(a_1[2], b_2[2], c_1_2);
c_1_3 = fma(a_1[2], b_2[3], c_1_3);
c_1_4 = fma(a_1[2], b_2[4], c_1_4);
c_2_0 = fma(a_2[2], b_2[0], c_2_0);
c_2_1 = fma(a_2[2], b_2[1], c_2_1);
c_2_2 = fma(a_2[2], b_2[2], c_2_2);
c_2_3 = fma(a_2[2], b_2[3], c_2_3);
c_2_4 = fma(a_2[2], b_2[4], c_2_4);
c_3_0 = fma(a_3[2], b_2[0], c_3_0);
c_3_1 = fma(a_3[2], b_2[1], c_3_1);
c_3_2 = fma(a_3[2], b_2[2], c_3_2);
c_3_3 = fma(a_3[2], b_2[3], c_3_3);
c_3_4 = fma(a_3[2], b_2[4], c_3_4);
c_0_0 = fma(a_0[3], b_3[0], c_0_0);
c_0_1 = fma(a_0[3], b_3[1], c_0_1);
c_0_2 = fma(a_0[3], b_3[2], c_0_2);
c_0_3 = fma(a_0[3], b_3[3], c_0_3);
c_0_4 = fma(a_0[3], b_3[4], c_0_4);
c_1_0 = fma(a_1[3], b_3[0], c_1_0);
c_1_1 = fma(a_1[3], b_3[1], c_1_1);
c_1_2 = fma(a_1[3], b_3[2], c_1_2);
c_1_3 = fma(a_1[3], b_3[3], c_1_3);
c_1_4 = fma(a_1[3], b_3[4], c_1_4);
c_2_0 = fma(a_2[3], b_3[0], c_2_0);
c_2_1 = fma(a_2[3], b_3[1], c_2_1);
c_2_2 = fma(a_2[3], b_3[2], c_2_2);
c_2_3 = fma(a_2[3], b_3[3], c_2_3);
c_2_4 = fma(a_2[3], b_3[4], c_2_4);
c_3_0 = fma(a_3[3], b_3[0], c_3_0);
c_3_1 = fma(a_3[3], b_3[1], c_3_1);
c_3_2 = fma(a_3[3], b_3[2], c_3_2);
c_3_3 = fma(a_3[3], b_3[3], c_3_3);
c_3_4 = fma(a_3[3], b_3[4], c_3_4);
c_0_0 = fma(a_0[4], b_4[0], c_0_0);
c_0_1 = fma(a_0[4], b_4[1], c_0_1);
c_0_2 = fma(a_0[4], b_4[2], c_0_2);
c_0_3 = fma(a_0[4], b_4[3], c_0_3);
c_0_4 = fma(a_0[4], b_4[4], c_0_4);
c_1_0 = fma(a_1[4], b_4[0], c_1_0);
c_1_1 = fma(a_1[4], b_4[1], c_1_1);
c_1_2 = fma(a_1[4], b_4[2], c_1_2);
c_1_3 = fma(a_1[4], b_4[3], c_1_3);
c_1_4 = fma(a_1[4], b_4[4], c_1_4);
c_2_0 = fma(a_2[4], b_4[0], c_2_0);
c_2_1 = fma(a_2[4], b_4[1], c_2_1);
c_2_2 = fma(a_2[4], b_4[2], c_2_2);
c_2_3 = fma(a_2[4], b_4[3], c_2_3);
c_2_4 = fma(a_2[4], b_4[4], c_2_4);
c_3_0 = fma(a_3[4], b_4[0], c_3_0);
c_3_1 = fma(a_3[4], b_4[1], c_3_1);
c_3_2 = fma(a_3[4], b_4[2], c_3_2);
c_3_3 = fma(a_3[4], b_4[3], c_3_3);
c_3_4 = fma(a_3[4], b_4[4], c_3_4);
c_0_0 = fma(a_0[5], b_5[0], c_0_0);
c_0_1 = fma(a_0[5], b_5[1], c_0_1);
c_0_2 = fma(a_0[5], b_5[2], c_0_2);
c_0_3 = fma(a_0[5], b_5[3], c_0_3);
c_0_4 = fma(a_0[5], b_5[4], c_0_4);
c_1_0 = fma(a_1[5], b_5[0], c_1_0);
c_1_1 = fma(a_1[5], b_5[1], c_1_1);
c_1_2 = fma(a_1[5], b_5[2], c_1_2);
c_1_3 = fma(a_1[5], b_5[3], c_1_3);
c_1_4 = fma(a_1[5], b_5[4], c_1_4);
c_2_0 = fma(a_2[5], b_5[0], c_2_0);
c_2_1 = fma(a_2[5], b_5[1], c_2_1);
c_2_2 = fma(a_2[5], b_5[2], c_2_2);
c_2_3 = fma(a_2[5], b_5[3], c_2_3);
c_2_4 = fma(a_2[5], b_5[4], c_2_4);
c_3_0 = fma(a_3[5], b_5[0], c_3_0);
c_3_1 = fma(a_3[5], b_5[1], c_3_1);
c_3_2 = fma(a_3[5], b_5[2], c_3_2);
c_3_3 = fma(a_3[5], b_5[3], c_3_3);
c_3_4 = fma(a_3[5], b_5[4], c_3_4);
}
for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)
{
t* a_0 = &a[lda*0];
t* a_1 = &a[lda*1];
t* a_2 = &a[lda*2];
t* a_3 = &a[lda*3];
t* b_0 = &b[ldb*0];
c_0_0 = fma(a_0[0], b_0[0], c_0_0);
c_0_1 = fma(a_0[0], b_0[1], c_0_1);
c_0_2 = fma(a_0[0], b_0[2], c_0_2);
c_0_3 = fma(a_0[0], b_0[3], c_0_3);
c_0_4 = fma(a_0[0], b_0[4], c_0_4);
c_1_0 = fma(a_1[0], b_0[0], c_1_0);
c_1_1 = fma(a_1[0], b_0[1], c_1_1);
c_1_2 = fma(a_1[0], b_0[2], c_1_2);
c_1_3 = fma(a_1[0], b_0[3], c_1_3);
c_1_4 = fma(a_1[0], b_0[4], c_1_4);
c_2_0 = fma(a_2[0], b_0[0], c_2_0);
c_2_1 = fma(a_2[0], b_0[1], c_2_1);
c_2_2 = fma(a_2[0], b_0[2], c_2_2);
c_2_3 = fma(a_2[0], b_0[3], c_2_3);
c_2_4 = fma(a_2[0], b_0[4], c_2_4);
c_3_0 = fma(a_3[0], b_0[0], c_3_0);
c_3_1 = fma(a_3[0], b_0[1], c_3_1);
c_3_2 = fma(a_3[0], b_0[2], c_3_2);
c_3_3 = fma(a_3[0], b_0[3], c_3_3);
c_3_4 = fma(a_3[0], b_0[4], c_3_4);
}
c_0[0] = c_0_0;
c_0[1] = c_0_1;
c_0[2] = c_0_2;
c_0[3] = c_0_3;
c_0[4] = c_0_4;
c_1[0] = c_1_0;
c_1[1] = c_1_1;
c_1[2] = c_1_2;
c_1[3] = c_1_3;
c_1[4] = c_1_4;
c_2[0] = c_2_0;
c_2[1] = c_2_1;
c_2[2] = c_2_2;
c_2[3] = c_2_3;
c_2[4] = c_2_4;
c_3[0] = c_3_0;
c_3[1] = c_3_1;
c_3[2] = c_3_2;
c_3[3] = c_3_3;
c_3[4] = c_3_4;
}

View File

@ -3,12 +3,14 @@ TARGET = simple
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
$(TARGET): $(TARGET).c
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64d -mcmodel=medany \
-O $(TARGET).c
# -O -T../../link/linkc.ld $(TARGET).c
# -nostartfiles -nostdlib $(TARGET).c
# -nostartfiles -nostdlib -T../../link/link.ld $(TARGET).c
-DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common \
-fno-builtin-printf -fno-tree-loop-distribute-patterns \
-static -nostdlib -nostartfiles -lm -lgcc -T../common/test.ld \
-I../common \
-O $(TARGET).c \
../common/crt.S ../common/syscalls.c
clean:
rm -f $(TARGET) $(TARGET).objdump

BIN
examples/C/simple/simple Executable file

Binary file not shown.

View File

@ -2,13 +2,20 @@
// David_Harris@hmc.edu 24 December 2021
// Simple illustration of compiling C code
#include <stdio.h>
long sum(long N) {
long result, i;
result = 0;
for (i=1; i<=N; i++) result = result + i;
for (i=1; i<=N; i++) {
result = result + i;
}
return result;
}
int main(void) {
return sum(4);
long s;
s = sum(4);
printf("s = %ld\n", s);
return 0; // 0 means success
}

View File

@ -1,17 +0,0 @@
OUTPUT_ARCH( "riscv" )
ENTRY(main)
SECTIONS
{
. = 0x80000000;
.text : { *(.text) }
. = ALIGN(0x1000);
.tohost : { *(.tohost) }
. = ALIGN(0x1000);
.data : { *(.data) }
.data.string : { *(.data.string)}
. = ALIGN(0x1000);
.bss : { *(.bss) }
_end = .;
}