mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-02 17:55:19 +00:00
Added C test cases
This commit is contained in:
parent
2a7e77d2b1
commit
e25760d8e5
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -8,3 +8,6 @@
|
||||
[submodule "addins/imperas-riscv-tests"]
|
||||
path = addins/imperas-riscv-tests
|
||||
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
|
||||
[submodule "addins/riscv-tests"]
|
||||
path = addins/riscv-tests
|
||||
url = https://github.com/riscv-software-src/riscv-tests
|
||||
|
1
addins/riscv-tests
Submodule
1
addins/riscv-tests
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7
|
24
examples/C/common/LICENSE
Normal file
24
examples/C/common/LICENSE
Normal file
@ -0,0 +1,24 @@
|
||||
Copyright (c) 2012-2015, The Regents of the University of California (Regents).
|
||||
All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Regents nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
|
||||
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
|
||||
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
|
||||
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
|
||||
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
|
||||
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
1
examples/C/common/README
Normal file
1
examples/C/common/README
Normal file
@ -0,0 +1 @@
|
||||
These files are from github.com/riscv-software-src/riscv-tests
|
225
examples/C/common/crt.S
Normal file
225
examples/C/common/crt.S
Normal file
@ -0,0 +1,225 @@
|
||||
# See LICENSE for license details.
|
||||
|
||||
#include "encoding.h"
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
# define LREG ld
|
||||
# define SREG sd
|
||||
# define REGBYTES 8
|
||||
#else
|
||||
# define LREG lw
|
||||
# define SREG sw
|
||||
# define REGBYTES 4
|
||||
#endif
|
||||
|
||||
.section ".text.init"
|
||||
.globl _start
|
||||
_start:
|
||||
li x1, 0
|
||||
li x2, 0
|
||||
li x3, 0
|
||||
li x4, 0
|
||||
li x5, 0
|
||||
li x6, 0
|
||||
li x7, 0
|
||||
li x8, 0
|
||||
li x9, 0
|
||||
li x10,0
|
||||
li x11,0
|
||||
li x12,0
|
||||
li x13,0
|
||||
li x14,0
|
||||
li x15,0
|
||||
li x16,0
|
||||
li x17,0
|
||||
li x18,0
|
||||
li x19,0
|
||||
li x20,0
|
||||
li x21,0
|
||||
li x22,0
|
||||
li x23,0
|
||||
li x24,0
|
||||
li x25,0
|
||||
li x26,0
|
||||
li x27,0
|
||||
li x28,0
|
||||
li x29,0
|
||||
li x30,0
|
||||
li x31,0
|
||||
|
||||
# enable FPU and accelerator if present
|
||||
li t0, MSTATUS_FS | MSTATUS_XS
|
||||
csrs mstatus, t0
|
||||
|
||||
# make sure XLEN agrees with compilation choice
|
||||
li t0, 1
|
||||
slli t0, t0, 31
|
||||
#if __riscv_xlen == 64
|
||||
bgez t0, 1f
|
||||
#else
|
||||
bltz t0, 1f
|
||||
#endif
|
||||
2:
|
||||
li a0, 1
|
||||
sw a0, tohost, t0
|
||||
j 2b
|
||||
1:
|
||||
|
||||
#ifdef __riscv_flen
|
||||
# initialize FPU if we have one
|
||||
la t0, 1f
|
||||
csrw mtvec, t0
|
||||
|
||||
fssr x0
|
||||
fmv.s.x f0, x0
|
||||
fmv.s.x f1, x0
|
||||
fmv.s.x f2, x0
|
||||
fmv.s.x f3, x0
|
||||
fmv.s.x f4, x0
|
||||
fmv.s.x f5, x0
|
||||
fmv.s.x f6, x0
|
||||
fmv.s.x f7, x0
|
||||
fmv.s.x f8, x0
|
||||
fmv.s.x f9, x0
|
||||
fmv.s.x f10,x0
|
||||
fmv.s.x f11,x0
|
||||
fmv.s.x f12,x0
|
||||
fmv.s.x f13,x0
|
||||
fmv.s.x f14,x0
|
||||
fmv.s.x f15,x0
|
||||
fmv.s.x f16,x0
|
||||
fmv.s.x f17,x0
|
||||
fmv.s.x f18,x0
|
||||
fmv.s.x f19,x0
|
||||
fmv.s.x f20,x0
|
||||
fmv.s.x f21,x0
|
||||
fmv.s.x f22,x0
|
||||
fmv.s.x f23,x0
|
||||
fmv.s.x f24,x0
|
||||
fmv.s.x f25,x0
|
||||
fmv.s.x f26,x0
|
||||
fmv.s.x f27,x0
|
||||
fmv.s.x f28,x0
|
||||
fmv.s.x f29,x0
|
||||
fmv.s.x f30,x0
|
||||
fmv.s.x f31,x0
|
||||
1:
|
||||
#endif
|
||||
|
||||
# initialize trap vector
|
||||
la t0, trap_entry
|
||||
csrw mtvec, t0
|
||||
|
||||
# initialize global pointer
|
||||
.option push
|
||||
.option norelax
|
||||
la gp, __global_pointer$
|
||||
.option pop
|
||||
|
||||
la tp, _end + 63
|
||||
and tp, tp, -64
|
||||
|
||||
# get core id
|
||||
csrr a0, mhartid
|
||||
# for now, assume only 1 core
|
||||
li a1, 1
|
||||
1:bgeu a0, a1, 1b
|
||||
|
||||
# give each core 128KB of stack + TLS
|
||||
#define STKSHIFT 17
|
||||
add sp, a0, 1
|
||||
sll sp, sp, STKSHIFT
|
||||
add sp, sp, tp
|
||||
sll a2, a0, STKSHIFT
|
||||
add tp, tp, a2
|
||||
|
||||
j _init
|
||||
|
||||
.align 2
|
||||
trap_entry:
|
||||
addi sp, sp, -272
|
||||
|
||||
SREG x1, 1*REGBYTES(sp)
|
||||
SREG x2, 2*REGBYTES(sp)
|
||||
SREG x3, 3*REGBYTES(sp)
|
||||
SREG x4, 4*REGBYTES(sp)
|
||||
SREG x5, 5*REGBYTES(sp)
|
||||
SREG x6, 6*REGBYTES(sp)
|
||||
SREG x7, 7*REGBYTES(sp)
|
||||
SREG x8, 8*REGBYTES(sp)
|
||||
SREG x9, 9*REGBYTES(sp)
|
||||
SREG x10, 10*REGBYTES(sp)
|
||||
SREG x11, 11*REGBYTES(sp)
|
||||
SREG x12, 12*REGBYTES(sp)
|
||||
SREG x13, 13*REGBYTES(sp)
|
||||
SREG x14, 14*REGBYTES(sp)
|
||||
SREG x15, 15*REGBYTES(sp)
|
||||
SREG x16, 16*REGBYTES(sp)
|
||||
SREG x17, 17*REGBYTES(sp)
|
||||
SREG x18, 18*REGBYTES(sp)
|
||||
SREG x19, 19*REGBYTES(sp)
|
||||
SREG x20, 20*REGBYTES(sp)
|
||||
SREG x21, 21*REGBYTES(sp)
|
||||
SREG x22, 22*REGBYTES(sp)
|
||||
SREG x23, 23*REGBYTES(sp)
|
||||
SREG x24, 24*REGBYTES(sp)
|
||||
SREG x25, 25*REGBYTES(sp)
|
||||
SREG x26, 26*REGBYTES(sp)
|
||||
SREG x27, 27*REGBYTES(sp)
|
||||
SREG x28, 28*REGBYTES(sp)
|
||||
SREG x29, 29*REGBYTES(sp)
|
||||
SREG x30, 30*REGBYTES(sp)
|
||||
SREG x31, 31*REGBYTES(sp)
|
||||
|
||||
csrr a0, mcause
|
||||
csrr a1, mepc
|
||||
mv a2, sp
|
||||
jal handle_trap
|
||||
csrw mepc, a0
|
||||
|
||||
# Remain in M-mode after eret
|
||||
li t0, MSTATUS_MPP
|
||||
csrs mstatus, t0
|
||||
|
||||
LREG x1, 1*REGBYTES(sp)
|
||||
LREG x2, 2*REGBYTES(sp)
|
||||
LREG x3, 3*REGBYTES(sp)
|
||||
LREG x4, 4*REGBYTES(sp)
|
||||
LREG x5, 5*REGBYTES(sp)
|
||||
LREG x6, 6*REGBYTES(sp)
|
||||
LREG x7, 7*REGBYTES(sp)
|
||||
LREG x8, 8*REGBYTES(sp)
|
||||
LREG x9, 9*REGBYTES(sp)
|
||||
LREG x10, 10*REGBYTES(sp)
|
||||
LREG x11, 11*REGBYTES(sp)
|
||||
LREG x12, 12*REGBYTES(sp)
|
||||
LREG x13, 13*REGBYTES(sp)
|
||||
LREG x14, 14*REGBYTES(sp)
|
||||
LREG x15, 15*REGBYTES(sp)
|
||||
LREG x16, 16*REGBYTES(sp)
|
||||
LREG x17, 17*REGBYTES(sp)
|
||||
LREG x18, 18*REGBYTES(sp)
|
||||
LREG x19, 19*REGBYTES(sp)
|
||||
LREG x20, 20*REGBYTES(sp)
|
||||
LREG x21, 21*REGBYTES(sp)
|
||||
LREG x22, 22*REGBYTES(sp)
|
||||
LREG x23, 23*REGBYTES(sp)
|
||||
LREG x24, 24*REGBYTES(sp)
|
||||
LREG x25, 25*REGBYTES(sp)
|
||||
LREG x26, 26*REGBYTES(sp)
|
||||
LREG x27, 27*REGBYTES(sp)
|
||||
LREG x28, 28*REGBYTES(sp)
|
||||
LREG x29, 29*REGBYTES(sp)
|
||||
LREG x30, 30*REGBYTES(sp)
|
||||
LREG x31, 31*REGBYTES(sp)
|
||||
|
||||
addi sp, sp, 272
|
||||
mret
|
||||
|
||||
.section ".tohost","aw",@progbits
|
||||
.align 6
|
||||
.globl tohost
|
||||
tohost: .dword 0
|
||||
.align 6
|
||||
.globl fromhost
|
||||
fromhost: .dword 0
|
2832
examples/C/common/encoding.h
Normal file
2832
examples/C/common/encoding.h
Normal file
File diff suppressed because it is too large
Load Diff
469
examples/C/common/syscalls.c
Normal file
469
examples/C/common/syscalls.c
Normal file
@ -0,0 +1,469 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include <sys/signal.h>
|
||||
#include "util.h"
|
||||
|
||||
#define SYS_write 64
|
||||
|
||||
#undef strcmp
|
||||
|
||||
extern volatile uint64_t tohost;
|
||||
extern volatile uint64_t fromhost;
|
||||
|
||||
static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
|
||||
{
|
||||
volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
|
||||
magic_mem[0] = which;
|
||||
magic_mem[1] = arg0;
|
||||
magic_mem[2] = arg1;
|
||||
magic_mem[3] = arg2;
|
||||
__sync_synchronize();
|
||||
|
||||
tohost = (uintptr_t)magic_mem;
|
||||
while (fromhost == 0)
|
||||
;
|
||||
fromhost = 0;
|
||||
|
||||
__sync_synchronize();
|
||||
return magic_mem[0];
|
||||
}
|
||||
|
||||
#define NUM_COUNTERS 2
|
||||
static uintptr_t counters[NUM_COUNTERS];
|
||||
static char* counter_names[NUM_COUNTERS];
|
||||
|
||||
void setStats(int enable)
|
||||
{
|
||||
int i = 0;
|
||||
#define READ_CTR(name) do { \
|
||||
while (i >= NUM_COUNTERS) ; \
|
||||
uintptr_t csr = read_csr(name); \
|
||||
if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
|
||||
counters[i++] = csr; \
|
||||
} while (0)
|
||||
|
||||
READ_CTR(mcycle);
|
||||
READ_CTR(minstret);
|
||||
|
||||
#undef READ_CTR
|
||||
}
|
||||
|
||||
void __attribute__((noreturn)) tohost_exit(uintptr_t code)
|
||||
{
|
||||
tohost = (code << 1) | 1;
|
||||
while (1);
|
||||
}
|
||||
|
||||
uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
|
||||
{
|
||||
tohost_exit(1337);
|
||||
}
|
||||
|
||||
void exit(int code)
|
||||
{
|
||||
tohost_exit(code);
|
||||
}
|
||||
|
||||
void abort()
|
||||
{
|
||||
exit(128 + SIGABRT);
|
||||
}
|
||||
|
||||
void printstr(const char* s)
|
||||
{
|
||||
syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
|
||||
}
|
||||
|
||||
void __attribute__((weak)) thread_entry(int cid, int nc)
|
||||
{
|
||||
// multi-threaded programs override this function.
|
||||
// for the case of single-threaded programs, only let core 0 proceed.
|
||||
while (cid != 0);
|
||||
}
|
||||
|
||||
int __attribute__((weak)) main(int argc, char** argv)
|
||||
{
|
||||
// single-threaded programs override this function.
|
||||
printstr("Implement main(), foo!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void init_tls()
|
||||
{
|
||||
register void* thread_pointer asm("tp");
|
||||
extern char _tdata_begin, _tdata_end, _tbss_end;
|
||||
size_t tdata_size = &_tdata_end - &_tdata_begin;
|
||||
memcpy(thread_pointer, &_tdata_begin, tdata_size);
|
||||
size_t tbss_size = &_tbss_end - &_tdata_end;
|
||||
memset(thread_pointer + tdata_size, 0, tbss_size);
|
||||
}
|
||||
|
||||
void _init(int cid, int nc)
|
||||
{
|
||||
init_tls();
|
||||
thread_entry(cid, nc);
|
||||
|
||||
// only single-threaded programs should ever get here.
|
||||
int ret = main(0, 0);
|
||||
|
||||
char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
|
||||
char* pbuf = buf;
|
||||
for (int i = 0; i < NUM_COUNTERS; i++)
|
||||
if (counters[i])
|
||||
pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
|
||||
if (pbuf != buf)
|
||||
printstr(buf);
|
||||
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
#undef putchar
|
||||
int putchar(int ch)
|
||||
{
|
||||
static __thread char buf[64] __attribute__((aligned(64)));
|
||||
static __thread int buflen = 0;
|
||||
|
||||
buf[buflen++] = ch;
|
||||
|
||||
if (ch == '\n' || buflen == sizeof(buf))
|
||||
{
|
||||
syscall(SYS_write, 1, (uintptr_t)buf, buflen);
|
||||
buflen = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printhex(uint64_t x)
|
||||
{
|
||||
char str[17];
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
|
||||
x >>= 4;
|
||||
}
|
||||
str[16] = 0;
|
||||
|
||||
printstr(str);
|
||||
}
|
||||
|
||||
static inline void printnum(void (*putch)(int, void**), void **putdat,
|
||||
unsigned long long num, unsigned base, int width, int padc)
|
||||
{
|
||||
unsigned digs[sizeof(num)*CHAR_BIT];
|
||||
int pos = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
digs[pos++] = num % base;
|
||||
if (num < base)
|
||||
break;
|
||||
num /= base;
|
||||
}
|
||||
|
||||
while (width-- > pos)
|
||||
putch(padc, putdat);
|
||||
|
||||
while (pos-- > 0)
|
||||
putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
|
||||
}
|
||||
|
||||
static unsigned long long getuint(va_list *ap, int lflag)
|
||||
{
|
||||
if (lflag >= 2)
|
||||
return va_arg(*ap, unsigned long long);
|
||||
else if (lflag)
|
||||
return va_arg(*ap, unsigned long);
|
||||
else
|
||||
return va_arg(*ap, unsigned int);
|
||||
}
|
||||
|
||||
static long long getint(va_list *ap, int lflag)
|
||||
{
|
||||
if (lflag >= 2)
|
||||
return va_arg(*ap, long long);
|
||||
else if (lflag)
|
||||
return va_arg(*ap, long);
|
||||
else
|
||||
return va_arg(*ap, int);
|
||||
}
|
||||
|
||||
static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
|
||||
{
|
||||
register const char* p;
|
||||
const char* last_fmt;
|
||||
register int ch, err;
|
||||
unsigned long long num;
|
||||
int base, lflag, width, precision, altflag;
|
||||
char padc;
|
||||
|
||||
while (1) {
|
||||
while ((ch = *(unsigned char *) fmt) != '%') {
|
||||
if (ch == '\0')
|
||||
return;
|
||||
fmt++;
|
||||
putch(ch, putdat);
|
||||
}
|
||||
fmt++;
|
||||
|
||||
// Process a %-escape sequence
|
||||
last_fmt = fmt;
|
||||
padc = ' ';
|
||||
width = -1;
|
||||
precision = -1;
|
||||
lflag = 0;
|
||||
altflag = 0;
|
||||
reswitch:
|
||||
switch (ch = *(unsigned char *) fmt++) {
|
||||
|
||||
// flag to pad on the right
|
||||
case '-':
|
||||
padc = '-';
|
||||
goto reswitch;
|
||||
|
||||
// flag to pad with 0's instead of spaces
|
||||
case '0':
|
||||
padc = '0';
|
||||
goto reswitch;
|
||||
|
||||
// width field
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
for (precision = 0; ; ++fmt) {
|
||||
precision = precision * 10 + ch - '0';
|
||||
ch = *fmt;
|
||||
if (ch < '0' || ch > '9')
|
||||
break;
|
||||
}
|
||||
goto process_precision;
|
||||
|
||||
case '*':
|
||||
precision = va_arg(ap, int);
|
||||
goto process_precision;
|
||||
|
||||
case '.':
|
||||
if (width < 0)
|
||||
width = 0;
|
||||
goto reswitch;
|
||||
|
||||
case '#':
|
||||
altflag = 1;
|
||||
goto reswitch;
|
||||
|
||||
process_precision:
|
||||
if (width < 0)
|
||||
width = precision, precision = -1;
|
||||
goto reswitch;
|
||||
|
||||
// long flag (doubled for long long)
|
||||
case 'l':
|
||||
lflag++;
|
||||
goto reswitch;
|
||||
|
||||
// character
|
||||
case 'c':
|
||||
putch(va_arg(ap, int), putdat);
|
||||
break;
|
||||
|
||||
// string
|
||||
case 's':
|
||||
if ((p = va_arg(ap, char *)) == NULL)
|
||||
p = "(null)";
|
||||
if (width > 0 && padc != '-')
|
||||
for (width -= strnlen(p, precision); width > 0; width--)
|
||||
putch(padc, putdat);
|
||||
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
|
||||
putch(ch, putdat);
|
||||
p++;
|
||||
}
|
||||
for (; width > 0; width--)
|
||||
putch(' ', putdat);
|
||||
break;
|
||||
|
||||
// (signed) decimal
|
||||
case 'd':
|
||||
num = getint(&ap, lflag);
|
||||
if ((long long) num < 0) {
|
||||
putch('-', putdat);
|
||||
num = -(long long) num;
|
||||
}
|
||||
base = 10;
|
||||
goto signed_number;
|
||||
|
||||
// unsigned decimal
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto unsigned_number;
|
||||
|
||||
// (unsigned) octal
|
||||
case 'o':
|
||||
// should do something with padding so it's always 3 octits
|
||||
base = 8;
|
||||
goto unsigned_number;
|
||||
|
||||
// pointer
|
||||
case 'p':
|
||||
static_assert(sizeof(long) == sizeof(void*));
|
||||
lflag = 1;
|
||||
putch('0', putdat);
|
||||
putch('x', putdat);
|
||||
/* fall through to 'x' */
|
||||
|
||||
// (unsigned) hexadecimal
|
||||
case 'x':
|
||||
base = 16;
|
||||
unsigned_number:
|
||||
num = getuint(&ap, lflag);
|
||||
signed_number:
|
||||
printnum(putch, putdat, num, base, width, padc);
|
||||
break;
|
||||
|
||||
// escaped '%' character
|
||||
case '%':
|
||||
putch(ch, putdat);
|
||||
break;
|
||||
|
||||
// unrecognized escape sequence - just print it literally
|
||||
default:
|
||||
putch('%', putdat);
|
||||
fmt = last_fmt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int printf(const char* fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
|
||||
vprintfmt((void*)putchar, 0, fmt, ap);
|
||||
|
||||
va_end(ap);
|
||||
return 0; // incorrect return value, but who cares, anyway?
|
||||
}
|
||||
|
||||
int sprintf(char* str, const char* fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char* str0 = str;
|
||||
va_start(ap, fmt);
|
||||
|
||||
void sprintf_putch(int ch, void** data)
|
||||
{
|
||||
char** pstr = (char**)data;
|
||||
**pstr = ch;
|
||||
(*pstr)++;
|
||||
}
|
||||
|
||||
vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
|
||||
*str = 0;
|
||||
|
||||
va_end(ap);
|
||||
return str - str0;
|
||||
}
|
||||
|
||||
void* memcpy(void* dest, const void* src, size_t len)
|
||||
{
|
||||
if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
|
||||
const uintptr_t* s = src;
|
||||
uintptr_t *d = dest;
|
||||
while (d < (uintptr_t*)(dest + len))
|
||||
*d++ = *s++;
|
||||
} else {
|
||||
const char* s = src;
|
||||
char *d = dest;
|
||||
while (d < (char*)(dest + len))
|
||||
*d++ = *s++;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
void* memset(void* dest, int byte, size_t len)
|
||||
{
|
||||
if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
|
||||
uintptr_t word = byte & 0xFF;
|
||||
word |= word << 8;
|
||||
word |= word << 16;
|
||||
word |= word << 16 << 16;
|
||||
|
||||
uintptr_t *d = dest;
|
||||
while (d < (uintptr_t*)(dest + len))
|
||||
*d++ = word;
|
||||
} else {
|
||||
char *d = dest;
|
||||
while (d < (char*)(dest + len))
|
||||
*d++ = byte;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
size_t strlen(const char *s)
|
||||
{
|
||||
const char *p = s;
|
||||
while (*p)
|
||||
p++;
|
||||
return p - s;
|
||||
}
|
||||
|
||||
size_t strnlen(const char *s, size_t n)
|
||||
{
|
||||
const char *p = s;
|
||||
while (n-- && *p)
|
||||
p++;
|
||||
return p - s;
|
||||
}
|
||||
|
||||
int strcmp(const char* s1, const char* s2)
|
||||
{
|
||||
unsigned char c1, c2;
|
||||
|
||||
do {
|
||||
c1 = *s1++;
|
||||
c2 = *s2++;
|
||||
} while (c1 != 0 && c1 == c2);
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
char* strcpy(char* dest, const char* src)
|
||||
{
|
||||
char* d = dest;
|
||||
while ((*d++ = *src++))
|
||||
;
|
||||
return dest;
|
||||
}
|
||||
|
||||
long atol(const char* str)
|
||||
{
|
||||
long res = 0;
|
||||
int sign = 0;
|
||||
|
||||
while (*str == ' ')
|
||||
str++;
|
||||
|
||||
if (*str == '-' || *str == '+') {
|
||||
sign = *str == '-';
|
||||
str++;
|
||||
}
|
||||
|
||||
while (*str) {
|
||||
res *= 10;
|
||||
res += *str++ - '0';
|
||||
}
|
||||
|
||||
return sign ? -res : res;
|
||||
}
|
66
examples/C/common/test.ld
Normal file
66
examples/C/common/test.ld
Normal file
@ -0,0 +1,66 @@
|
||||
/*======================================================================*/
|
||||
/* Proxy kernel linker script */
|
||||
/*======================================================================*/
|
||||
/* This is the linker script used when building the proxy kernel. */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Setup */
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
/* The OUTPUT_ARCH command specifies the machine architecture where the
|
||||
argument is one of the names used in the BFD library. More
|
||||
specifically one of the entires in bfd/cpu-mips.c */
|
||||
|
||||
OUTPUT_ARCH( "riscv" )
|
||||
ENTRY(_start)
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Sections */
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
|
||||
/* text: test code section */
|
||||
. = 0x80000000;
|
||||
.text.init : { *(.text.init) }
|
||||
|
||||
. = ALIGN(0x1000);
|
||||
.tohost : { *(.tohost) }
|
||||
|
||||
. = ALIGN(0x1000);
|
||||
.text : { *(.text) }
|
||||
|
||||
/* data segment */
|
||||
.data : { *(.data) }
|
||||
|
||||
.sdata : {
|
||||
__global_pointer$ = . + 0x800;
|
||||
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
|
||||
*(.sdata .sdata.* .gnu.linkonce.s.*)
|
||||
}
|
||||
|
||||
/* bss segment */
|
||||
.sbss : {
|
||||
*(.sbss .sbss.* .gnu.linkonce.sb.*)
|
||||
*(.scommon)
|
||||
}
|
||||
.bss : { *(.bss) }
|
||||
|
||||
/* thread-local data segment */
|
||||
.tdata :
|
||||
{
|
||||
_tdata_begin = .;
|
||||
*(.tdata)
|
||||
_tdata_end = .;
|
||||
}
|
||||
.tbss :
|
||||
{
|
||||
*(.tbss)
|
||||
_tbss_end = .;
|
||||
}
|
||||
|
||||
/* End of uninitalized data segement */
|
||||
_end = .;
|
||||
}
|
||||
|
90
examples/C/common/util.h
Normal file
90
examples/C/common/util.h
Normal file
@ -0,0 +1,90 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef __UTIL_H
|
||||
#define __UTIL_H
|
||||
|
||||
extern void setStats(int enable);
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
|
||||
|
||||
static int verify(int n, const volatile int* test, const int* verify)
|
||||
{
|
||||
int i;
|
||||
// Unrolled for faster verification
|
||||
for (i = 0; i < n/2*2; i+=2)
|
||||
{
|
||||
int t0 = test[i], t1 = test[i+1];
|
||||
int v0 = verify[i], v1 = verify[i+1];
|
||||
if (t0 != v0) return i+1;
|
||||
if (t1 != v1) return i+2;
|
||||
}
|
||||
if (n % 2 != 0 && test[n-1] != verify[n-1])
|
||||
return n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verifyDouble(int n, const volatile double* test, const double* verify)
|
||||
{
|
||||
int i;
|
||||
// Unrolled for faster verification
|
||||
for (i = 0; i < n/2*2; i+=2)
|
||||
{
|
||||
double t0 = test[i], t1 = test[i+1];
|
||||
double v0 = verify[i], v1 = verify[i+1];
|
||||
int eq1 = t0 == v0, eq2 = t1 == v1;
|
||||
if (!(eq1 & eq2)) return i+1+eq1;
|
||||
}
|
||||
if (n % 2 != 0 && test[n-1] != verify[n-1])
|
||||
return n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __attribute__((noinline)) barrier(int ncores)
|
||||
{
|
||||
static volatile int sense;
|
||||
static volatile int count;
|
||||
static __thread int threadsense;
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
threadsense = !threadsense;
|
||||
if (__sync_fetch_and_add(&count, 1) == ncores-1)
|
||||
{
|
||||
count = 0;
|
||||
sense = threadsense;
|
||||
}
|
||||
else while(sense != threadsense)
|
||||
;
|
||||
|
||||
__sync_synchronize();
|
||||
}
|
||||
|
||||
static uint64_t lfsr(uint64_t x)
|
||||
{
|
||||
uint64_t bit = (x ^ (x >> 1)) & 1;
|
||||
return (x >> 1) | (bit << 62);
|
||||
}
|
||||
|
||||
static uintptr_t insn_len(uintptr_t pc)
|
||||
{
|
||||
return (*(unsigned short*)pc & 3) ? 4 : 2;
|
||||
}
|
||||
|
||||
#ifdef __riscv
|
||||
#include "encoding.h"
|
||||
#endif
|
||||
|
||||
#define stringify_1(s) #s
|
||||
#define stringify(s) stringify_1(s)
|
||||
#define stats(code, iter) do { \
|
||||
unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
|
||||
code; \
|
||||
_c += read_csr(mcycle), _i += read_csr(minstret); \
|
||||
if (cid == 0) \
|
||||
printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
|
||||
stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
|
||||
} while(0)
|
||||
|
||||
#endif //__UTIL_H
|
16
examples/C/mm/Makefile
Normal file
16
examples/C/mm/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
TARGET = mm
|
||||
|
||||
$(TARGET).objdump: $(TARGET)
|
||||
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
|
||||
|
||||
$(TARGET): $(TARGET).c Makefile
|
||||
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64d -mcmodel=medany \
|
||||
-DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common \
|
||||
-fno-builtin-printf -fno-tree-loop-distribute-patterns \
|
||||
-static -nostdlib -nostartfiles -lm -lgcc -T../common/test.ld \
|
||||
-I../common \
|
||||
-O *.c \
|
||||
../common/crt.S ../common/syscalls.c
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(TARGET).objdump
|
36
examples/C/mm/common.h
Normal file
36
examples/C/mm/common.h
Normal file
@ -0,0 +1,36 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef _MM_H
|
||||
#define _MM_H
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef SP
|
||||
typedef float t;
|
||||
#define fma fmaf
|
||||
#else
|
||||
typedef double t;
|
||||
#endif
|
||||
|
||||
#define inline inline __attribute__((always_inline))
|
||||
|
||||
#define alloca_aligned(s, a) ((void*)(((uintptr_t)alloca((s)+(a)-1)+(a)-1)&~((a)-1)))
|
||||
|
||||
#include "rb.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void mm(size_t m, size_t n, size_t p,
|
||||
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
//void rb(t* a, t* b, t* c, size_t lda, size_t ldb, size_t ldc);
|
||||
|
||||
#endif
|
BIN
examples/C/mm/mm
Executable file
BIN
examples/C/mm/mm
Executable file
Binary file not shown.
152
examples/C/mm/mm.c
Normal file
152
examples/C/mm/mm.c
Normal file
@ -0,0 +1,152 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#include "common.h"
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void mm_naive(size_t m, size_t n, size_t p,
|
||||
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
|
||||
{
|
||||
for (size_t i = 0; i < m; i++)
|
||||
{
|
||||
for (size_t j = 0; j < n; j++)
|
||||
{
|
||||
t s0 = c[i*ldc+j], s1 = 0, s2 = 0, s3 = 0;
|
||||
for (size_t k = 0; k < p/4*4; k+=4)
|
||||
{
|
||||
s0 = fma(a[i*lda+k+0], b[(k+0)*ldb+j], s0);
|
||||
s1 = fma(a[i*lda+k+1], b[(k+1)*ldb+j], s1);
|
||||
s2 = fma(a[i*lda+k+2], b[(k+2)*ldb+j], s2);
|
||||
s3 = fma(a[i*lda+k+3], b[(k+3)*ldb+j], s3);
|
||||
}
|
||||
for (size_t k = p/4*4; k < p; k++)
|
||||
s0 = fma(a[i*lda+k], b[k*ldb+j], s0);
|
||||
c[i*ldc+j] = (s0 + s1) + (s2 + s3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mm_rb(size_t m, size_t n, size_t p,
|
||||
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
|
||||
{
|
||||
size_t mb = m/RBM*RBM, nb = n/RBN*RBN;
|
||||
for (size_t i = 0; i < mb; i += RBM)
|
||||
{
|
||||
for (size_t j = 0; j < nb; j += RBN)
|
||||
kloop(p, a+i*lda, lda, b+j, ldb, c+i*ldc+j, ldc);
|
||||
mm_naive(RBM, n - nb, p, a+i*lda, lda, b+nb, ldb, c+i*ldc+nb, ldc);
|
||||
}
|
||||
mm_naive(m - mb, n, p, a+mb*lda, lda, b, ldb, c+mb*ldc, ldc);
|
||||
}
|
||||
|
||||
static inline void repack(t* a, size_t lda, const t* a0, size_t lda0, size_t m, size_t p)
|
||||
{
|
||||
for (size_t i = 0; i < m; i++)
|
||||
{
|
||||
for (size_t j = 0; j < p/8*8; j+=8)
|
||||
{
|
||||
t t0 = a0[i*lda0+j+0];
|
||||
t t1 = a0[i*lda0+j+1];
|
||||
t t2 = a0[i*lda0+j+2];
|
||||
t t3 = a0[i*lda0+j+3];
|
||||
t t4 = a0[i*lda0+j+4];
|
||||
t t5 = a0[i*lda0+j+5];
|
||||
t t6 = a0[i*lda0+j+6];
|
||||
t t7 = a0[i*lda0+j+7];
|
||||
a[i*lda+j+0] = t0;
|
||||
a[i*lda+j+1] = t1;
|
||||
a[i*lda+j+2] = t2;
|
||||
a[i*lda+j+3] = t3;
|
||||
a[i*lda+j+4] = t4;
|
||||
a[i*lda+j+5] = t5;
|
||||
a[i*lda+j+6] = t6;
|
||||
a[i*lda+j+7] = t7;
|
||||
}
|
||||
for (size_t j = p/8*8; j < p; j++)
|
||||
a[i*lda+j] = a0[i*lda0+j];
|
||||
}
|
||||
}
|
||||
|
||||
static void mm_cb(size_t m, size_t n, size_t p,
|
||||
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
|
||||
{
|
||||
size_t nmb = m/CBM, nnb = n/CBN, npb = p/CBK;
|
||||
size_t mb = nmb*CBM, nb = nnb*CBN, pb = npb*CBK;
|
||||
//t a1[mb*pb], b1[pb*nb], c1[mb*nb];
|
||||
t* a1 = (t*)alloca_aligned(sizeof(t)*mb*pb, 8192);
|
||||
t* b1 = (t*)alloca_aligned(sizeof(t)*pb*nb, 8192);
|
||||
t* c1 = (t*)alloca_aligned(sizeof(t)*mb*nb, 8192);
|
||||
|
||||
for (size_t i = 0; i < mb; i += CBM)
|
||||
for (size_t j = 0; j < pb; j += CBK)
|
||||
repack(a1 + (npb*(i/CBM) + j/CBK)*(CBM*CBK), CBK, a + i*lda + j, lda, CBM, CBK);
|
||||
|
||||
for (size_t i = 0; i < pb; i += CBK)
|
||||
for (size_t j = 0; j < nb; j += CBN)
|
||||
repack(b1 + (nnb*(i/CBK) + j/CBN)*(CBK*CBN), CBN, b + i*ldb + j, ldb, CBK, CBN);
|
||||
|
||||
for (size_t i = 0; i < mb; i += CBM)
|
||||
for (size_t j = 0; j < nb; j += CBN)
|
||||
repack(c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, c + i*ldc + j, ldc, CBM, CBN);
|
||||
|
||||
for (size_t i = 0; i < mb; i += CBM)
|
||||
{
|
||||
for (size_t j = 0; j < nb; j += CBN)
|
||||
{
|
||||
for (size_t k = 0; k < pb; k += CBK)
|
||||
{
|
||||
mm_rb(CBM, CBN, CBK,
|
||||
a1 + (npb*(i/CBM) + k/CBK)*(CBM*CBK), CBK,
|
||||
b1 + (nnb*(k/CBK) + j/CBN)*(CBK*CBN), CBN,
|
||||
c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
|
||||
}
|
||||
if (pb < p)
|
||||
{
|
||||
mm_rb(CBM, CBN, p - pb,
|
||||
a + i*lda + pb, lda,
|
||||
b + pb*ldb + j, ldb,
|
||||
c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN);
|
||||
}
|
||||
}
|
||||
if (nb < n)
|
||||
{
|
||||
for (size_t k = 0; k < p; k += CBK)
|
||||
{
|
||||
mm_rb(CBM, n - nb, MIN(p - k, CBK),
|
||||
a + i*lda + k, lda,
|
||||
b + k*ldb + nb, ldb,
|
||||
c + i*ldc + nb, ldc);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mb < m)
|
||||
{
|
||||
for (size_t j = 0; j < n; j += CBN)
|
||||
{
|
||||
for (size_t k = 0; k < p; k += CBK)
|
||||
{
|
||||
mm_rb(m - mb, MIN(n - j, CBN), MIN(p - k, CBK),
|
||||
a + mb*lda + k, lda,
|
||||
b + k*ldb + j, ldb,
|
||||
c + mb*ldc + j, ldc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < mb; i += CBM)
|
||||
for (size_t j = 0; j < nb; j += CBN)
|
||||
repack(c + i*ldc + j, ldc, c1 + (nnb*(i/CBM) + j/CBN)*(CBM*CBN), CBN, CBM, CBN);
|
||||
}
|
||||
|
||||
void mm(size_t m, size_t n, size_t p,
|
||||
t* a, size_t lda, t* b, size_t ldb, t* c, size_t ldc)
|
||||
{
|
||||
if (__builtin_expect(m <= 2*CBM && n <= 2*CBN && p <= 2*CBK, 1))
|
||||
mm_rb(m, n, p, a, lda, b, ldb, c, ldc);
|
||||
else
|
||||
mm_cb(m, n, p, a, lda, b, ldb, c, ldc);
|
||||
}
|
76
examples/C/mm/mm_main.c
Normal file
76
examples/C/mm/mm_main.c
Normal file
@ -0,0 +1,76 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#include "common.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "util.h"
|
||||
|
||||
#pragma GCC optimize ("unroll-loops")
|
||||
|
||||
int main(int argc, char** argv)
|
||||
//void thread_entry(int cid, int nc)
|
||||
{
|
||||
const int R = 8;
|
||||
int m, n, p;
|
||||
uint64_t s = 0xdeadbeefU;
|
||||
|
||||
int cid = 0;
|
||||
int nc = 0;
|
||||
|
||||
m = CBM;
|
||||
n = CBN;
|
||||
p = CBK;
|
||||
|
||||
t a[m*p];
|
||||
t b[p*n];
|
||||
t c[m*n];
|
||||
|
||||
for (size_t i = 0; i < m; i++)
|
||||
for (size_t j = 0; j < p; j++)
|
||||
a[i*p+j] = (t)(s = lfsr(s));
|
||||
for (size_t i = 0; i < p; i++)
|
||||
for (size_t j = 0; j < n; j++)
|
||||
b[i*n+j] = (t)(s = lfsr(s));
|
||||
memset(c, 0, m*n*sizeof(c[0]));
|
||||
|
||||
size_t instret, cycles;
|
||||
for (int i = 0; i < R; i++)
|
||||
{
|
||||
instret = -read_csr(minstret);
|
||||
cycles = -read_csr(mcycle);
|
||||
mm(m, n, p, a, p, b, n, c, n);
|
||||
instret += read_csr(minstret);
|
||||
cycles += read_csr(mcycle);
|
||||
}
|
||||
|
||||
asm volatile("fence");
|
||||
|
||||
printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
|
||||
cid, RBM, RBN, RBK, CBM, CBN, CBK);
|
||||
printf("C%d: %d instructions\n", cid, (int)(instret));
|
||||
printf("C%d: %d cycles\n", cid, (int)(cycles));
|
||||
printf("C%d: %d flops\n", cid, 2*m*n*p);
|
||||
printf("C%d: %d Mflops @ 1 GHz\n", cid, 2000*m*n*p/(cycles));
|
||||
|
||||
#if 1
|
||||
for (size_t i = 0; i < m; i++)
|
||||
{
|
||||
for (size_t j = 0; j < n; j++)
|
||||
{
|
||||
t s = 0;
|
||||
for (size_t k = 0; k < p; k++)
|
||||
s += a[i*p+k] * b[k*n+j];
|
||||
s *= R;
|
||||
if (fabs(c[i*n+j]-s) > fabs(1e-6*s))
|
||||
{
|
||||
printf("C%d: c[%lu][%lu] %f != %f\n", cid, i, j, c[i*n+j], s);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//barrier(nc);
|
||||
exit(0);
|
||||
}
|
81
examples/C/mm/old.gen.scala
Normal file
81
examples/C/mm/old.gen.scala
Normal file
@ -0,0 +1,81 @@
|
||||
import scala.sys.process._
|
||||
object MMGen {
|
||||
implicit def i2s(i: Int) = i.toString
|
||||
def writeFile(name: String, contents: String) = {
|
||||
val f = new java.io.FileWriter(name)
|
||||
f.write(contents)
|
||||
f.close
|
||||
}
|
||||
|
||||
var indent = 0
|
||||
def spacing = " " * indent
|
||||
def assign(lhs: String, rhs: String) =
|
||||
spacing + lhs + " = " + rhs + ";\n"
|
||||
def init(t: String, n: String, v: String) =
|
||||
assign(t+" "+n, v)
|
||||
def open_block(s: String = "") = {
|
||||
val result = (if (s != "") spacing + s else "") + spacing + "{\n"
|
||||
indent = indent + 1
|
||||
result
|
||||
}
|
||||
def close_block = {
|
||||
indent = indent - 1
|
||||
spacing + "}\n"
|
||||
}
|
||||
|
||||
def ar(m: String, i: String) = m+"["+i+"]"
|
||||
def r(a: String, b: String*) = (a :: b.toList).reduceLeft(_+"_"+_)
|
||||
|
||||
def rb(m: Int, n: Int, p: Int) = {
|
||||
var s = open_block("static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)\n")
|
||||
|
||||
for (i <- 0 until m)
|
||||
s += init("t*", r("c", i), "&"+ar("c", "ldc*"+i))
|
||||
for (i <- 0 until m; j <- 0 until n)
|
||||
s += init("t", r("c", i, j), ar(r("c", i), j))
|
||||
|
||||
def doit(m: Int, n: Int, p: Int) = {
|
||||
for (i <- 0 until m)
|
||||
s += init("t*", r("a", i), "&"+ar("a", "lda*"+i))
|
||||
for (k <- 0 until p)
|
||||
s += init("t*", r("b", k), "&"+ar("b", "ldb*"+k))
|
||||
for (k <- 0 until p; i <- 0 until m; j <- 0 until n)
|
||||
s += assign(r("c", i, j), "fma(" + ar(r("a", i), k) + ", " + ar(r("b", k), j) + ", " + r("c", i, j) + ")")
|
||||
}
|
||||
|
||||
s += open_block("for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)\n")
|
||||
doit(m, n, p)
|
||||
s += close_block
|
||||
|
||||
s += open_block("for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)\n")
|
||||
doit(m, n, 1)
|
||||
s += close_block
|
||||
|
||||
for (i <- 0 until m; j <- 0 until n)
|
||||
s += assign(ar(r("c", i), j), r("c", i, j))
|
||||
s += close_block
|
||||
|
||||
s
|
||||
}
|
||||
def gcd(a: Int, b: Int): Int = if (b == 0) a else gcd(b, a%b)
|
||||
def lcm(a: Int, b: Int): Int = a*b/gcd(a, b)
|
||||
def lcm(a: Seq[Int]): Int = {
|
||||
if (a.tail.isEmpty) a.head
|
||||
else lcm(a.head, lcm(a.tail))
|
||||
}
|
||||
def test1(m: Int, n: Int, p: Int, m1: Int, n1: Int, p1: Int) = {
|
||||
val decl = "static const int RBM = "+m+", RBN = "+n+", RBK = "+p+";\n" +
|
||||
"static const int CBM = "+m1+", CBN = "+n1+", CBK = "+p1+";\n"
|
||||
writeFile("rb.h", decl + rb(m, n, p))
|
||||
//"make"!!
|
||||
|
||||
"make run"!
|
||||
|
||||
("cp a.out " + Seq("b", m, n, p, m1, n1, p1, "run").reduce(_+"."+_))!
|
||||
}
|
||||
def main(args: Array[String]): Unit = {
|
||||
test1(4, 5, 6, 24, 25, 24)
|
||||
//for (i <- 4 to 6; j <- 4 to 6; k <- 4 to 6)
|
||||
// test1(i, j, k, if (i == 5) 35 else 36, if (j == 5) 35 else 36, if (k == 5) 35 else 36)
|
||||
}
|
||||
}
|
210
examples/C/mm/rb.h
Normal file
210
examples/C/mm/rb.h
Normal file
@ -0,0 +1,210 @@
|
||||
static const int RBM = 4, RBN = 5, RBK = 6;
|
||||
static const int CBM = 24, CBN = 25, CBK = 24;
|
||||
static inline void kloop(size_t p, t* a0, size_t lda, t* b0, size_t ldb, t* c, size_t ldc)
|
||||
{
|
||||
t* c_0 = &c[ldc*0];
|
||||
t* c_1 = &c[ldc*1];
|
||||
t* c_2 = &c[ldc*2];
|
||||
t* c_3 = &c[ldc*3];
|
||||
t c_0_0 = c_0[0];
|
||||
t c_0_1 = c_0[1];
|
||||
t c_0_2 = c_0[2];
|
||||
t c_0_3 = c_0[3];
|
||||
t c_0_4 = c_0[4];
|
||||
t c_1_0 = c_1[0];
|
||||
t c_1_1 = c_1[1];
|
||||
t c_1_2 = c_1[2];
|
||||
t c_1_3 = c_1[3];
|
||||
t c_1_4 = c_1[4];
|
||||
t c_2_0 = c_2[0];
|
||||
t c_2_1 = c_2[1];
|
||||
t c_2_2 = c_2[2];
|
||||
t c_2_3 = c_2[3];
|
||||
t c_2_4 = c_2[4];
|
||||
t c_3_0 = c_3[0];
|
||||
t c_3_1 = c_3[1];
|
||||
t c_3_2 = c_3[2];
|
||||
t c_3_3 = c_3[3];
|
||||
t c_3_4 = c_3[4];
|
||||
for (t *a = a0, *b = b0; a < a0 + p/RBK*RBK; a += RBK, b += RBK*ldb)
|
||||
{
|
||||
t* a_0 = &a[lda*0];
|
||||
t* a_1 = &a[lda*1];
|
||||
t* a_2 = &a[lda*2];
|
||||
t* a_3 = &a[lda*3];
|
||||
t* b_0 = &b[ldb*0];
|
||||
t* b_1 = &b[ldb*1];
|
||||
t* b_2 = &b[ldb*2];
|
||||
t* b_3 = &b[ldb*3];
|
||||
t* b_4 = &b[ldb*4];
|
||||
t* b_5 = &b[ldb*5];
|
||||
c_0_0 = fma(a_0[0], b_0[0], c_0_0);
|
||||
c_0_1 = fma(a_0[0], b_0[1], c_0_1);
|
||||
c_0_2 = fma(a_0[0], b_0[2], c_0_2);
|
||||
c_0_3 = fma(a_0[0], b_0[3], c_0_3);
|
||||
c_0_4 = fma(a_0[0], b_0[4], c_0_4);
|
||||
c_1_0 = fma(a_1[0], b_0[0], c_1_0);
|
||||
c_1_1 = fma(a_1[0], b_0[1], c_1_1);
|
||||
c_1_2 = fma(a_1[0], b_0[2], c_1_2);
|
||||
c_1_3 = fma(a_1[0], b_0[3], c_1_3);
|
||||
c_1_4 = fma(a_1[0], b_0[4], c_1_4);
|
||||
c_2_0 = fma(a_2[0], b_0[0], c_2_0);
|
||||
c_2_1 = fma(a_2[0], b_0[1], c_2_1);
|
||||
c_2_2 = fma(a_2[0], b_0[2], c_2_2);
|
||||
c_2_3 = fma(a_2[0], b_0[3], c_2_3);
|
||||
c_2_4 = fma(a_2[0], b_0[4], c_2_4);
|
||||
c_3_0 = fma(a_3[0], b_0[0], c_3_0);
|
||||
c_3_1 = fma(a_3[0], b_0[1], c_3_1);
|
||||
c_3_2 = fma(a_3[0], b_0[2], c_3_2);
|
||||
c_3_3 = fma(a_3[0], b_0[3], c_3_3);
|
||||
c_3_4 = fma(a_3[0], b_0[4], c_3_4);
|
||||
c_0_0 = fma(a_0[1], b_1[0], c_0_0);
|
||||
c_0_1 = fma(a_0[1], b_1[1], c_0_1);
|
||||
c_0_2 = fma(a_0[1], b_1[2], c_0_2);
|
||||
c_0_3 = fma(a_0[1], b_1[3], c_0_3);
|
||||
c_0_4 = fma(a_0[1], b_1[4], c_0_4);
|
||||
c_1_0 = fma(a_1[1], b_1[0], c_1_0);
|
||||
c_1_1 = fma(a_1[1], b_1[1], c_1_1);
|
||||
c_1_2 = fma(a_1[1], b_1[2], c_1_2);
|
||||
c_1_3 = fma(a_1[1], b_1[3], c_1_3);
|
||||
c_1_4 = fma(a_1[1], b_1[4], c_1_4);
|
||||
c_2_0 = fma(a_2[1], b_1[0], c_2_0);
|
||||
c_2_1 = fma(a_2[1], b_1[1], c_2_1);
|
||||
c_2_2 = fma(a_2[1], b_1[2], c_2_2);
|
||||
c_2_3 = fma(a_2[1], b_1[3], c_2_3);
|
||||
c_2_4 = fma(a_2[1], b_1[4], c_2_4);
|
||||
c_3_0 = fma(a_3[1], b_1[0], c_3_0);
|
||||
c_3_1 = fma(a_3[1], b_1[1], c_3_1);
|
||||
c_3_2 = fma(a_3[1], b_1[2], c_3_2);
|
||||
c_3_3 = fma(a_3[1], b_1[3], c_3_3);
|
||||
c_3_4 = fma(a_3[1], b_1[4], c_3_4);
|
||||
c_0_0 = fma(a_0[2], b_2[0], c_0_0);
|
||||
c_0_1 = fma(a_0[2], b_2[1], c_0_1);
|
||||
c_0_2 = fma(a_0[2], b_2[2], c_0_2);
|
||||
c_0_3 = fma(a_0[2], b_2[3], c_0_3);
|
||||
c_0_4 = fma(a_0[2], b_2[4], c_0_4);
|
||||
c_1_0 = fma(a_1[2], b_2[0], c_1_0);
|
||||
c_1_1 = fma(a_1[2], b_2[1], c_1_1);
|
||||
c_1_2 = fma(a_1[2], b_2[2], c_1_2);
|
||||
c_1_3 = fma(a_1[2], b_2[3], c_1_3);
|
||||
c_1_4 = fma(a_1[2], b_2[4], c_1_4);
|
||||
c_2_0 = fma(a_2[2], b_2[0], c_2_0);
|
||||
c_2_1 = fma(a_2[2], b_2[1], c_2_1);
|
||||
c_2_2 = fma(a_2[2], b_2[2], c_2_2);
|
||||
c_2_3 = fma(a_2[2], b_2[3], c_2_3);
|
||||
c_2_4 = fma(a_2[2], b_2[4], c_2_4);
|
||||
c_3_0 = fma(a_3[2], b_2[0], c_3_0);
|
||||
c_3_1 = fma(a_3[2], b_2[1], c_3_1);
|
||||
c_3_2 = fma(a_3[2], b_2[2], c_3_2);
|
||||
c_3_3 = fma(a_3[2], b_2[3], c_3_3);
|
||||
c_3_4 = fma(a_3[2], b_2[4], c_3_4);
|
||||
c_0_0 = fma(a_0[3], b_3[0], c_0_0);
|
||||
c_0_1 = fma(a_0[3], b_3[1], c_0_1);
|
||||
c_0_2 = fma(a_0[3], b_3[2], c_0_2);
|
||||
c_0_3 = fma(a_0[3], b_3[3], c_0_3);
|
||||
c_0_4 = fma(a_0[3], b_3[4], c_0_4);
|
||||
c_1_0 = fma(a_1[3], b_3[0], c_1_0);
|
||||
c_1_1 = fma(a_1[3], b_3[1], c_1_1);
|
||||
c_1_2 = fma(a_1[3], b_3[2], c_1_2);
|
||||
c_1_3 = fma(a_1[3], b_3[3], c_1_3);
|
||||
c_1_4 = fma(a_1[3], b_3[4], c_1_4);
|
||||
c_2_0 = fma(a_2[3], b_3[0], c_2_0);
|
||||
c_2_1 = fma(a_2[3], b_3[1], c_2_1);
|
||||
c_2_2 = fma(a_2[3], b_3[2], c_2_2);
|
||||
c_2_3 = fma(a_2[3], b_3[3], c_2_3);
|
||||
c_2_4 = fma(a_2[3], b_3[4], c_2_4);
|
||||
c_3_0 = fma(a_3[3], b_3[0], c_3_0);
|
||||
c_3_1 = fma(a_3[3], b_3[1], c_3_1);
|
||||
c_3_2 = fma(a_3[3], b_3[2], c_3_2);
|
||||
c_3_3 = fma(a_3[3], b_3[3], c_3_3);
|
||||
c_3_4 = fma(a_3[3], b_3[4], c_3_4);
|
||||
c_0_0 = fma(a_0[4], b_4[0], c_0_0);
|
||||
c_0_1 = fma(a_0[4], b_4[1], c_0_1);
|
||||
c_0_2 = fma(a_0[4], b_4[2], c_0_2);
|
||||
c_0_3 = fma(a_0[4], b_4[3], c_0_3);
|
||||
c_0_4 = fma(a_0[4], b_4[4], c_0_4);
|
||||
c_1_0 = fma(a_1[4], b_4[0], c_1_0);
|
||||
c_1_1 = fma(a_1[4], b_4[1], c_1_1);
|
||||
c_1_2 = fma(a_1[4], b_4[2], c_1_2);
|
||||
c_1_3 = fma(a_1[4], b_4[3], c_1_3);
|
||||
c_1_4 = fma(a_1[4], b_4[4], c_1_4);
|
||||
c_2_0 = fma(a_2[4], b_4[0], c_2_0);
|
||||
c_2_1 = fma(a_2[4], b_4[1], c_2_1);
|
||||
c_2_2 = fma(a_2[4], b_4[2], c_2_2);
|
||||
c_2_3 = fma(a_2[4], b_4[3], c_2_3);
|
||||
c_2_4 = fma(a_2[4], b_4[4], c_2_4);
|
||||
c_3_0 = fma(a_3[4], b_4[0], c_3_0);
|
||||
c_3_1 = fma(a_3[4], b_4[1], c_3_1);
|
||||
c_3_2 = fma(a_3[4], b_4[2], c_3_2);
|
||||
c_3_3 = fma(a_3[4], b_4[3], c_3_3);
|
||||
c_3_4 = fma(a_3[4], b_4[4], c_3_4);
|
||||
c_0_0 = fma(a_0[5], b_5[0], c_0_0);
|
||||
c_0_1 = fma(a_0[5], b_5[1], c_0_1);
|
||||
c_0_2 = fma(a_0[5], b_5[2], c_0_2);
|
||||
c_0_3 = fma(a_0[5], b_5[3], c_0_3);
|
||||
c_0_4 = fma(a_0[5], b_5[4], c_0_4);
|
||||
c_1_0 = fma(a_1[5], b_5[0], c_1_0);
|
||||
c_1_1 = fma(a_1[5], b_5[1], c_1_1);
|
||||
c_1_2 = fma(a_1[5], b_5[2], c_1_2);
|
||||
c_1_3 = fma(a_1[5], b_5[3], c_1_3);
|
||||
c_1_4 = fma(a_1[5], b_5[4], c_1_4);
|
||||
c_2_0 = fma(a_2[5], b_5[0], c_2_0);
|
||||
c_2_1 = fma(a_2[5], b_5[1], c_2_1);
|
||||
c_2_2 = fma(a_2[5], b_5[2], c_2_2);
|
||||
c_2_3 = fma(a_2[5], b_5[3], c_2_3);
|
||||
c_2_4 = fma(a_2[5], b_5[4], c_2_4);
|
||||
c_3_0 = fma(a_3[5], b_5[0], c_3_0);
|
||||
c_3_1 = fma(a_3[5], b_5[1], c_3_1);
|
||||
c_3_2 = fma(a_3[5], b_5[2], c_3_2);
|
||||
c_3_3 = fma(a_3[5], b_5[3], c_3_3);
|
||||
c_3_4 = fma(a_3[5], b_5[4], c_3_4);
|
||||
}
|
||||
for (t *a = a0 + p/RBK*RBK, *b = b0 + p/RBK*RBK*ldb; a < a0 + p; a++, b += ldb)
|
||||
{
|
||||
t* a_0 = &a[lda*0];
|
||||
t* a_1 = &a[lda*1];
|
||||
t* a_2 = &a[lda*2];
|
||||
t* a_3 = &a[lda*3];
|
||||
t* b_0 = &b[ldb*0];
|
||||
c_0_0 = fma(a_0[0], b_0[0], c_0_0);
|
||||
c_0_1 = fma(a_0[0], b_0[1], c_0_1);
|
||||
c_0_2 = fma(a_0[0], b_0[2], c_0_2);
|
||||
c_0_3 = fma(a_0[0], b_0[3], c_0_3);
|
||||
c_0_4 = fma(a_0[0], b_0[4], c_0_4);
|
||||
c_1_0 = fma(a_1[0], b_0[0], c_1_0);
|
||||
c_1_1 = fma(a_1[0], b_0[1], c_1_1);
|
||||
c_1_2 = fma(a_1[0], b_0[2], c_1_2);
|
||||
c_1_3 = fma(a_1[0], b_0[3], c_1_3);
|
||||
c_1_4 = fma(a_1[0], b_0[4], c_1_4);
|
||||
c_2_0 = fma(a_2[0], b_0[0], c_2_0);
|
||||
c_2_1 = fma(a_2[0], b_0[1], c_2_1);
|
||||
c_2_2 = fma(a_2[0], b_0[2], c_2_2);
|
||||
c_2_3 = fma(a_2[0], b_0[3], c_2_3);
|
||||
c_2_4 = fma(a_2[0], b_0[4], c_2_4);
|
||||
c_3_0 = fma(a_3[0], b_0[0], c_3_0);
|
||||
c_3_1 = fma(a_3[0], b_0[1], c_3_1);
|
||||
c_3_2 = fma(a_3[0], b_0[2], c_3_2);
|
||||
c_3_3 = fma(a_3[0], b_0[3], c_3_3);
|
||||
c_3_4 = fma(a_3[0], b_0[4], c_3_4);
|
||||
}
|
||||
c_0[0] = c_0_0;
|
||||
c_0[1] = c_0_1;
|
||||
c_0[2] = c_0_2;
|
||||
c_0[3] = c_0_3;
|
||||
c_0[4] = c_0_4;
|
||||
c_1[0] = c_1_0;
|
||||
c_1[1] = c_1_1;
|
||||
c_1[2] = c_1_2;
|
||||
c_1[3] = c_1_3;
|
||||
c_1[4] = c_1_4;
|
||||
c_2[0] = c_2_0;
|
||||
c_2[1] = c_2_1;
|
||||
c_2[2] = c_2_2;
|
||||
c_2[3] = c_2_3;
|
||||
c_2[4] = c_2_4;
|
||||
c_3[0] = c_3_0;
|
||||
c_3[1] = c_3_1;
|
||||
c_3[2] = c_3_2;
|
||||
c_3[3] = c_3_3;
|
||||
c_3[4] = c_3_4;
|
||||
}
|
@ -3,12 +3,14 @@ TARGET = simple
|
||||
$(TARGET).objdump: $(TARGET)
|
||||
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
|
||||
|
||||
$(TARGET): $(TARGET).c
|
||||
$(TARGET): $(TARGET).c Makefile
|
||||
riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64d -mcmodel=medany \
|
||||
-O $(TARGET).c
|
||||
# -O -T../../link/linkc.ld $(TARGET).c
|
||||
# -nostartfiles -nostdlib $(TARGET).c
|
||||
# -nostartfiles -nostdlib -T../../link/link.ld $(TARGET).c
|
||||
-DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common \
|
||||
-fno-builtin-printf -fno-tree-loop-distribute-patterns \
|
||||
-static -nostdlib -nostartfiles -lm -lgcc -T../common/test.ld \
|
||||
-I../common \
|
||||
-O $(TARGET).c \
|
||||
../common/crt.S ../common/syscalls.c
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(TARGET).objdump
|
||||
|
BIN
examples/C/simple/simple
Executable file
BIN
examples/C/simple/simple
Executable file
Binary file not shown.
@ -2,13 +2,20 @@
|
||||
// David_Harris@hmc.edu 24 December 2021
|
||||
// Simple illustration of compiling C code
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
long sum(long N) {
|
||||
long result, i;
|
||||
result = 0;
|
||||
for (i=1; i<=N; i++) result = result + i;
|
||||
for (i=1; i<=N; i++) {
|
||||
result = result + i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
return sum(4);
|
||||
long s;
|
||||
s = sum(4);
|
||||
printf("s = %ld\n", s);
|
||||
return 0; // 0 means success
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
OUTPUT_ARCH( "riscv" )
|
||||
ENTRY(main)
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = 0x80000000;
|
||||
.text : { *(.text) }
|
||||
. = ALIGN(0x1000);
|
||||
.tohost : { *(.tohost) }
|
||||
. = ALIGN(0x1000);
|
||||
.data : { *(.data) }
|
||||
.data.string : { *(.data.string)}
|
||||
. = ALIGN(0x1000);
|
||||
.bss : { *(.bss) }
|
||||
_end = .;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user