1
0
mirror of https://github.com/openhwgroup/cvw synced 2025-02-11 06:05:49 +00:00

Merge branch 'main' into dcache

This commit is contained in:
Ross Thompson 2021-07-15 11:55:20 -05:00
commit 4549a9f1c9
185 changed files with 28127 additions and 10721 deletions
.gitignore
riscv-coremark
wally-pipelined

7
.gitignore vendored
View File

@ -28,5 +28,12 @@ wally-pipelined/linux-testgen/nohup*
wally-pipelined/linux-testgen/x*
!wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py
!wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh
!wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh
!wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs
wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs/*
!wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs/git_create_dir.txt
wally-pipelined/linux-testgen/buildroot/
wally-pipelined/linux-testgen/buildroot-image-output
wally-pipelined/linux-testgen/buildroot-config-src/main.config.old
wally-pipelined/regression/slack-notifier/slack-webhook-url.txt

View File

@ -3,6 +3,8 @@
CoreMark's primary goals are simplicity and providing a method for testing only a processor's core features. For more information about EEMBC's comprehensive embedded benchmark suites, please see www.eembc.org.
For a more compute-intensive version of CoreMark that uses larger datasets and execution loops taken from common applications, please check out EEMBC's [CoreMark-PRO](https://www.github.com/eembc/coremark-pro) benchmark, also on GitHub.
# Building and Running
To build and run the benchmark, type
@ -83,7 +85,9 @@ Use `XCFLAGS=-DMULTITHREAD=N` where N is number of threads to run in parallel. S
% make XCFLAGS="-DMULTITHREAD=4 -DUSE_PTHREAD"
~~~
Above will compile the benchmark for execution on 4 cores, using POSIX Threads API.
The above will compile the benchmark for execution on 4 cores, using POSIX Threads API.
Note: linking may fail on the previous command if your linker does not automatically add the `pthread` library. If you encounter `undefined reference` errors, please modify the `core_portme.mak` file for your platform, (e.g. `linux/core_portme.mak`) and add `-lpthread` to the `LFLAGS_END` parameter.
# Run Parameters for the Benchmark Executable
CoreMark's executable takes several parameters as follows (but only if `main()` accepts arguments):
@ -109,7 +113,7 @@ The default for such a target when testing different configurations could be:
# Submitting Results
CoreMark results can be submitted on the web. Open a web browser and go to https://www.eembc.org/coremark/login.php?url=enter_score.php. After registering an account you may enter a score.
CoreMark results can be submitted on the web. Open a web browser and go to the [submission page](https://www.eembc.org/coremark/submit.php). After registering an account you may enter a score.
# Run Rules
What is and is not allowed.

159
riscv-coremark/coremark/barebones/core_portme.c Executable file → Normal file
View File

@ -19,110 +19,135 @@ Original Author: Shay Gal-on
#include "core_portme.h"
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile=0x3415;
volatile ee_s32 seed2_volatile=0x3415;
volatile ee_s32 seed3_volatile=0x66;
volatile ee_s32 seed1_volatile = 0x3415;
volatile ee_s32 seed2_volatile = 0x3415;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile=0x0;
volatile ee_s32 seed2_volatile=0x0;
volatile ee_s32 seed3_volatile=0x66;
volatile ee_s32 seed1_volatile = 0x0;
volatile ee_s32 seed2_volatile = 0x0;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile=0x8;
volatile ee_s32 seed2_volatile=0x8;
volatile ee_s32 seed3_volatile=0x8;
volatile ee_s32 seed1_volatile = 0x8;
volatile ee_s32 seed2_volatile = 0x8;
volatile ee_s32 seed3_volatile = 0x8;
#endif
volatile ee_s32 seed4_volatile=ITERATIONS;
volatile ee_s32 seed5_volatile=0;
volatile ee_s32 seed4_volatile = ITERATIONS;
volatile ee_s32 seed5_volatile = 0;
/* Porting : Timing functions
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
Sample implementation for standard time.h and windows.h definitions included.
How to capture time and convert to seconds must be ported to whatever is
supported by the platform. e.g. Read value from on board RTC, read value from
cpu clock cycles performance counter etc. Sample implementation for standard
time.h and windows.h definitions included.
*/
CORETIMETYPE barebones_clock() {
#error "You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
CORETIMETYPE
barebones_clock()
{
#error \
"You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
}
/* Define : TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be measured.
Divider to trade off timer resolution and total time that can be
measured.
Use lower values to increase resolution, but make sure that overflow does not occur.
If there are issues with the return value overflowing, increase this value.
*/
#define GETMYTIME(_t) (*_t=barebones_clock())
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1
Use lower values to increase resolution, but make sure that overflow
does not occur. If there are issues with the return value overflowing,
increase this value.
*/
#define GETMYTIME(_t) (*_t = barebones_clock())
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function : start_time
This function will be called right before starting the timed portion of the benchmark.
This function will be called right before starting the timed portion of
the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
Implementation may be capturing a system timer (as implemented in the
example code) or zeroing some system parameters - e.g. setting the cpu clocks
cycles to 0.
*/
void start_time(void) {
GETMYTIME(&start_time_val );
void
start_time(void)
{
GETMYTIME(&start_time_val);
}
/* Function : stop_time
This function will be called right after ending the timed portion of the benchmark.
This function will be called right after ending the timed portion of the
benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or other system parameters - e.g. reading the current value of cpu cycles counter.
Implementation may be capturing a system timer (as implemented in the
example code) or other system parameters - e.g. reading the current value of
cpu cycles counter.
*/
void stop_time(void) {
GETMYTIME(&stop_time_val );
void
stop_time(void)
{
GETMYTIME(&stop_time_val);
}
/* Function : get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other value,
as long as it can be converted to seconds by <time_in_secs>.
This methodology is taken to accomodate any hardware or simulated platform.
The sample implementation returns millisecs by default,
and the resolution is controlled by <TIMER_RES_DIVIDER>
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other
value, as long as it can be converted to seconds by <time_in_secs>. This
methodology is taken to accomodate any hardware or simulated platform. The
sample implementation returns millisecs by default, and the resolution is
controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
CORE_TICKS
get_time(void)
{
CORE_TICKS elapsed
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function : time_in_secs
Convert the value returned by get_time to seconds.
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for floating point.
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
The <secs_ret> type is used to accomodate systems with no support for
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
macro above.
*/
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
secs_ret
time_in_secs(CORE_TICKS ticks)
{
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
ee_u32 default_num_contexts=1;
ee_u32 default_num_contexts = 1;
/* Function : portable_init
Target specific initialization code
Test for some common mistakes.
Target specific initialization code
Test for some common mistakes.
*/
void portable_init(core_portable *p, int *argc, char *argv[])
void
portable_init(core_portable *p, int *argc, char *argv[])
{
#error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id=1;
#error \
"Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
{
ee_printf(
"ERROR! Please define ee_ptr_int to a type that holds a "
"pointer!\n");
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id = 1;
}
/* Function : portable_fini
Target specific final code
Target specific final code
*/
void portable_fini(core_portable *p)
void
portable_fini(core_portable *p)
{
p->portable_id=0;
p->portable_id = 0;
}

179
riscv-coremark/coremark/barebones/core_portme.h Executable file → Normal file
View File

@ -16,178 +16,189 @@ limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic : Description
This file contains configuration constants required to execute on different platforms
This file contains configuration constants required to execute on
different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration : HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 1
#endif
/* Configuration : USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 1
#endif
/* Configuration : HAS_STDIO
Define to 1 if the platform has stdio.h.
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 0
#endif
/* Configuration : HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf function.
Define to 1 if the platform has stdio.h and implements the printf
function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 0
#endif
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS \
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#endif
/* Data Types :
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
To avoid compiler issues, define the data types that need ot be used for
8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise
coremark may fail!!!
*/
typedef signed short ee_s16;
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
#define NULL ((void *)0)
/* align_mem :
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
This macro is used to align an offset to point to a 32b value. It is
used in the Matrix algorithm to initialize the input memory blocks.
*/
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
/* Configuration : CORE_TICKS
Define type of return from the timing functions.
Define type of return from the timing functions.
*/
#define CORETIMETYPE ee_u32
#define CORETIMETYPE ee_u32
typedef ee_u32 CORE_TICKS;
/* Configuration : SEED_METHOD
Defines method to get seed values that cannot be computed at compile time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
Defines method to get seed values that cannot be computed at compile
time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration : MEM_METHOD
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STACK
#endif
/* Configuration : MULTITHREAD
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
to fit a particular architecture.
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching
parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
to enable them.
It is valid to have a different implementation of <core_start_parallel>
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#define USE_PTHREAD 0
#define USE_FORK 0
#define USE_SOCKET 0
#define USE_FORK 0
#define USE_SOCKET 0
#endif
/* Configuration : MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef MAIN_HAS_NOARGC
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 0
#endif
/* Configuration : MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable : default_num_contexts
Not used for this simple port, must cintain the value 1.
Not used for this simple port, must cintain the value 1.
*/
extern ee_u32 default_num_contexts;
typedef struct CORE_PORTABLE_S {
ee_u8 portable_id;
typedef struct CORE_PORTABLE_S
{
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE==1200)
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
&& !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE == 1200)
#define PROFILE_RUN 1
#elif (TOTAL_DATA_SIZE==2000)
#elif (TOTAL_DATA_SIZE == 2000)
#define PERFORMANCE_RUN 1
#else
#define VALIDATION_RUN 1

164
riscv-coremark/coremark/barebones/cvt.c Executable file → Normal file
View File

@ -17,101 +17,111 @@ limitations under the License.
#define CVTBUFSIZE 80
static char CVTBUF[CVTBUFSIZE];
static char *cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
static char *
cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
{
int r2;
double fi, fj;
char *p, *p1;
int r2;
double fi, fj;
char * p, *p1;
if (ndigits < 0) ndigits = 0;
if (ndigits >= CVTBUFSIZE - 1) ndigits = CVTBUFSIZE - 2;
r2 = 0;
*sign = 0;
p = &buf[0];
if (arg < 0)
{
*sign = 1;
arg = -arg;
}
arg = modf(arg, &fi);
p1 = &buf[CVTBUFSIZE];
if (ndigits < 0)
ndigits = 0;
if (ndigits >= CVTBUFSIZE - 1)
ndigits = CVTBUFSIZE - 2;
r2 = 0;
*sign = 0;
p = &buf[0];
if (arg < 0)
{
*sign = 1;
arg = -arg;
}
arg = modf(arg, &fi);
p1 = &buf[CVTBUFSIZE];
if (fi != 0)
{
p1 = &buf[CVTBUFSIZE];
while (fi != 0)
if (fi != 0)
{
fj = modf(fi / 10, &fi);
*--p1 = (int)((fj + .03) * 10) + '0';
r2++;
p1 = &buf[CVTBUFSIZE];
while (fi != 0)
{
fj = modf(fi / 10, &fi);
*--p1 = (int)((fj + .03) * 10) + '0';
r2++;
}
while (p1 < &buf[CVTBUFSIZE])
*p++ = *p1++;
}
while (p1 < &buf[CVTBUFSIZE]) *p++ = *p1++;
}
else if (arg > 0)
{
while ((fj = arg * 10) < 1)
else if (arg > 0)
{
arg = fj;
r2--;
while ((fj = arg * 10) < 1)
{
arg = fj;
r2--;
}
}
}
p1 = &buf[ndigits];
if (eflag == 0) p1 += r2;
*decpt = r2;
if (p1 < &buf[0])
{
buf[0] = '\0';
p1 = &buf[ndigits];
if (eflag == 0)
p1 += r2;
*decpt = r2;
if (p1 < &buf[0])
{
buf[0] = '\0';
return buf;
}
while (p <= p1 && p < &buf[CVTBUFSIZE])
{
arg *= 10;
arg = modf(arg, &fj);
*p++ = (int)fj + '0';
}
if (p1 >= &buf[CVTBUFSIZE])
{
buf[CVTBUFSIZE - 1] = '\0';
return buf;
}
p = p1;
*p1 += 5;
while (*p1 > '9')
{
*p1 = '0';
if (p1 > buf)
++*--p1;
else
{
*p1 = '1';
(*decpt)++;
if (eflag == 0)
{
if (p > buf)
*p = '0';
p++;
}
}
}
*p = '\0';
return buf;
}
while (p <= p1 && p < &buf[CVTBUFSIZE])
{
arg *= 10;
arg = modf(arg, &fj);
*p++ = (int) fj + '0';
}
if (p1 >= &buf[CVTBUFSIZE])
{
buf[CVTBUFSIZE - 1] = '\0';
return buf;
}
p = p1;
*p1 += 5;
while (*p1 > '9')
{
*p1 = '0';
if (p1 > buf)
++*--p1;
else
{
*p1 = '1';
(*decpt)++;
if (eflag == 0)
{
if (p > buf) *p = '0';
p++;
}
}
}
*p = '\0';
return buf;
}
char *ecvt(double arg, int ndigits, int *decpt, int *sign)
char *
ecvt(double arg, int ndigits, int *decpt, int *sign)
{
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
}
char *ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
char *
ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
{
return cvt(arg, ndigits, decpt, sign, buf, 1);
return cvt(arg, ndigits, decpt, sign, buf, 1);
}
char *fcvt(double arg, int ndigits, int *decpt, int *sign)
char *
fcvt(double arg, int ndigits, int *decpt, int *sign)
{
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
}
char *fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
char *
fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
{
return cvt(arg, ndigits, decpt, sign, buf, 0);
return cvt(arg, ndigits, decpt, sign, buf, 0);
}

1067
riscv-coremark/coremark/barebones/ee_printf.c Executable file → Normal file

File diff suppressed because it is too large Load Diff

View File

@ -17,8 +17,8 @@ Original Author: Shay Gal-on
*/
#include "coremark.h"
#include <stdlib.h>
#include <string.h>
//#include <stdlib.h>
//#include <string.h>
/*
Topic: Description
Benchmark using a linked list.
@ -118,7 +118,7 @@ ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) {
return a->idx - b->idx;
}
void ehitoa(int value, char *str, int base){
/*void ehitoa(int value, char *str, int base){
if (value>100000) strcpy(str,"too big");
else{
int places[6] = {100000, 10000, 1000, 100, 10, 1};
@ -135,7 +135,7 @@ void ehitoa(int value, char *str, int base){
}
str[6]=0;
}
}
}*/
void copy_info(list_data *to,list_data *from) {
to->data16=from->data16;
@ -158,22 +158,22 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
list_head *finder, *remover;
list_data info;
ee_s16 i;
ee_printf("entered corebenchlist \n");
//ee_printf("entered corebenchlist \n");
info.idx=finder_idx;
/* find <find_num> values in the list, and change the list each time (reverse and cache if value found) */
for (i=0; i<find_num; i++) {
ee_printf("for loop \n");
//ee_printf("for loop \n");
info.data16= (i & 0xff) ;
this_find=core_list_find(list,&info);
list=core_list_reverse(list);
if (this_find==NULL) {
missed++;
retval+=(list->next->info->data16 >> 8) & 1;
ee_printf("if statement \n");
//ee_printf("if statement \n");
}
else {
found++;
ee_printf("else statement \n");
//ee_printf("else statement \n");
if (this_find->info->data16 & 0x1) /* use found value */
retval+=(this_find->info->data16 >> 9) & 1;
/* and cache next item at the head of the list (if any) */
@ -187,7 +187,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
if (info.idx>=0)
info.idx++;
#if CORE_DEBUG
ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found);
//ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found);
#endif
}
retval+=found*4-missed;
@ -204,7 +204,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
finder=finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 1: %04x\n",retval);
//ee_printf("List sort 1: %04x\n",retval);
#endif
remover=core_list_undo_remove(remover,list->next);
/* sort the list by index, in effect returning the list to original state */
@ -216,7 +216,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
finder=finder->next;
}
#if CORE_DEBUG
ee_printf("List sort 2: %04x\n",retval);
//ee_printf("List sort 2: %04x\n",retval);
#endif
return retval;
}
@ -235,26 +235,26 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
*/
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) {
/* calculated pointers for the list */
ee_printf("%d \n blksize", blksize);
//ee_printf("%d \n blksize", blksize);
ee_u32 per_item=16+sizeof(struct list_data_s);
ee_printf("%d \n sizeof", sizeof(struct list_data_s));
ee_printf("%d \n per_item", per_item);
//ee_printf("%d \n sizeof", sizeof(struct list_data_s));
//ee_printf("%d \n per_item", per_item);
ee_u32 size=(blksize/per_item)-2;
char bufftwo[200];
ehitoa(size, bufftwo, 10);
ee_printf(" size = %s done \n", bufftwo);
ee_printf("%d", size);/* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */
//char bufftwo[200];
//ehitoa(size, bufftwo, 10);
//ee_printf(" size = %s done \n", bufftwo);
//ee_printf("%d", size);/* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */
list_head *memblock_end=memblock+size;
list_data *datablock=(list_data *)(memblock_end);
list_data *datablock_end=datablock+size;
ee_printf("datablock_end");
//ee_printf("datablock_end");
/* some useful variables */
ee_u32 i;
list_head *finder,*list=memblock;
list_data info;
ehitoa(size, bufftwo, 10);
ee_printf(" size2 = %s done \n", bufftwo);
//ehitoa(size, bufftwo, 10);
//ee_printf(" size2 = %s done \n", bufftwo);
/* create a fake items for the list head and tail */
list->next=NULL;
@ -265,58 +265,58 @@ list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) {
datablock++;
info.idx=0x7fff;
info.data16=(ee_s16)0xffff;
ehitoa(size, bufftwo, 10);
ee_printf(" size3 = %s done \n", bufftwo);
//ehitoa(size, bufftwo, 10);
//ee_printf(" size3 = %s done \n", bufftwo);
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
ehitoa(size, bufftwo, 10);
ee_printf(" size4 = %s done \n", bufftwo);;
//ehitoa(size, bufftwo, 10);
//ee_printf(" size4 = %s done \n", bufftwo);;
/* then insert size items */
for (i=0; i<size; i++) {
ee_u16 datpat=((ee_u16)(seed^i) & 0xf);
ee_u16 dat=(datpat<<3) | (i&0x7); /* alternate between algorithms */
info.data16=(dat<<8) | dat; /* fill the data with actual data and upper bits with rebuild value */
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
ehitoa(i, bufftwo, 10);
ee_printf(" i = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" i = %s done \n", bufftwo);
//ee_printf("%d \n", i);
/*char grow[200];
char growtwo[200];
itoa(i, growtwo, 10);
sprintf(grow, "test %u buff2 %s goodbyeadd \n", i, growtwo);*/
}
ee_printf("exited for \n");
//ee_printf("exited for \n");
/* and now index the list so we know initial seed order of the list */
finder=list->next;
i=1;
ehitoa(i, bufftwo, 10);
ee_printf(" i = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" i = %s done \n", bufftwo);
while (finder->next!=NULL) {
ee_printf("enter while statement \n");
//ee_printf("enter while statement \n");
if (i<size/5){ /* first 20% of the list in order */
finder->info->idx=i++;
ehitoa(i, bufftwo, 10);
ee_printf(" if i = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" if i = %s done \n", bufftwo);
}
else {
ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */
finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */
ehitoa(i, bufftwo, 10);
ee_printf(" else i = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" else i = %s done \n", bufftwo);
}
finder=finder->next;
}
ehitoa(i, bufftwo, 10);
ee_printf(" i2 = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" i2 = %s done \n", bufftwo);
list = core_list_mergesort(list,cmp_idx,NULL);
#if CORE_DEBUG
ee_printf("Initialized list:\n");
//ee_printf("Initialized list:\n");
finder=list;
while (finder) {
ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16);
//ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16);
finder=finder->next;
}
ee_printf("\n");
//ee_printf("\n");
#endif
return list;
}
@ -424,20 +424,22 @@ list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modifi
Found item, or NULL if not found.
*/
list_head *core_list_find(list_head *list,list_data *info) {
ee_printf("entered core_list_find \n");
//ee_printf("entered core_list_find \n");
if (info->idx>=0) {
ee_printf("find if \n");
//ee_printf("find if \n");
while (list && (list->info->idx != info->idx)){
list=list->next;
ee_printf("find while if \n");}
ee_printf("core_list_find end \n");
//ee_printf("find while if \n");
}
//ee_printf("core_list_find end \n");
return list;
} else {
ee_printf("find else");
//ee_printf("find else");
while (list && ((list->info->data16 & 0xff) != info->data16)){
list=list->next;
ee_printf("find while else \n");}
ee_printf("core list find end \n");
//ee_printf("find while else \n");
}
//ee_printf("core list find end \n");
return list;
}
}
@ -456,7 +458,7 @@ list_head *core_list_find(list_head *list,list_data *info) {
*/
list_head *core_list_reverse(list_head *list) {
ee_printf("entered core_list_reverse");
// ee_printf("entered core_list_reverse");
list_head *next=NULL, *tmp;
while (list) {
tmp=list->next;
@ -464,7 +466,7 @@ list_head *core_list_reverse(list_head *list) {
next=list;
list=tmp;
}
ee_printf("core_list_reverse done");
//ee_printf("core_list_reverse done");
return next;
}
/* Function: core_list_mergesort
@ -493,27 +495,27 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
ee_s32 insize, nmerges, psize, qsize, i;
insize = 1;
char bufftwo[200];
//char bufftwo[200];
while (1) {
p = list;
list = NULL;
tail = NULL;
nmerges = 0; /* count number of merges we do in this pass */
ehitoa(nmerges, bufftwo, 10);
ee_printf(" nmerges default value = %s done \n", bufftwo);
//ehitoa(nmerges, bufftwo, 10);
//ee_printf(" nmerges default value = %s done \n", bufftwo);
while (p) {
nmerges++; /* there exists a merge to be done */
ehitoa(nmerges, bufftwo, 10);
ee_printf(" current nmerges = %s done \n", bufftwo);
//ehitoa(nmerges, bufftwo, 10);
//ee_printf(" current nmerges = %s done \n", bufftwo);
/* step `insize' places along from p */
q = p;
psize = 0;
ehitoa(insize, bufftwo, 10);
ee_printf(" insize = %s done \n", bufftwo);
//ehitoa(insize, bufftwo, 10);
//ee_printf(" insize = %s done \n", bufftwo);
for (i = 0; i < insize; i++) {
ehitoa(i, bufftwo, 10);
ee_printf(" i = %s done \n", bufftwo);
//ehitoa(i, bufftwo, 10);
//ee_printf(" i = %s done \n", bufftwo);
psize++;
q = q->next;
if (!q) break;
@ -521,37 +523,37 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
/* if q hasn't fallen off end, we have two lists to merge */
qsize = insize;
ehitoa(qsize, bufftwo, 10);
ee_printf(" qsize = %s done \n", bufftwo);
//ehitoa(qsize, bufftwo, 10);
//ee_printf(" qsize = %s done \n", bufftwo);
/* now we have two lists; merge them */
while (psize > 0 || (qsize > 0 && q)) {
/* decide whether next element of merge comes from p or q */
if (psize == 0) {
ee_printf("if \n");
//ee_printf("if \n");
/* p is empty; e must come from q. */
e = q; q = q->next; qsize--;
} else if (qsize == 0 || !q) {
ee_printf("else if \n");
//ee_printf("else if \n");
/* q is empty; e must come from p. */
e = p; p = p->next; psize--;
} else if (cmp(p->info,q->info,res) <= 0) {
ee_printf("else if 2 \n");
//ee_printf("else if 2 \n");
/* First element of p is lower (or same); e must come from p. */
e = p; p = p->next; psize--;
} else {
ee_printf("else \n");
//ee_printf("else \n");
/* First element of q is lower; e must come from q. */
e = q; q = q->next; qsize--;
}
/* add the next element to the merged list */
if (tail) {
ee_printf("tail if \n");
//ee_printf("tail if \n");
tail->next = e;
} else {
ee_printf("tail else \n");
//ee_printf("tail else \n");
list = e;
}
tail = e;
@ -569,8 +571,8 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
/* Otherwise repeat, merging lists twice the size */
insize *= 2;
ehitoa(insize, bufftwo, 10);
ee_printf(" insize2 = %s done \n", bufftwo);
//ehitoa(insize, bufftwo, 10);
//ee_printf(" insize2 = %s done \n", bufftwo);
}
#if COMPILER_REQUIRES_SORT_RETURN
return list;

View File

@ -17,396 +17,431 @@ Original Author: Shay Gal-on
*/
/* File: core_main.c
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
This file contains the framework to acquire a block of memory, seed
initial parameters, tun t he benchmark and report the results.
*/
#include "coremark.h"
/* Function: iterate
Run the benchmark for a specified number of iterations.
Run the benchmark for a specified number of iterations.
Operation:
For each type of benchmarked algorithm:
a - Initialize the data block for the algorithm.
b - Execute the algorithm N times.
Operation:
For each type of benchmarked algorithm:
a - Initialize the data block for the algorithm.
b - Execute the algorithm N times.
Returns:
NULL.
Returns:
NULL.
*/
static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1};
static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747};
static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84};
int gg_printf(const char *fmt, ...);
int sendstring(const char *p);
void _send_char(char c);
void *iterate(void *pres) {
ee_u32 i;
ee_u16 crc;
core_results *res=(core_results *)pres;
ee_u32 iterations=res->iterations;
res->crc=0;
res->crclist=0;
res->crcmatrix=0;
res->crcstate=0;
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
(ee_u16)0x3340,
(ee_u16)0x6a79,
(ee_u16)0xe714,
(ee_u16)0xe3c1 };
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
(ee_u16)0x1199,
(ee_u16)0x5608,
(ee_u16)0x1fd7,
(ee_u16)0x0747 };
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
(ee_u16)0x39bf,
(ee_u16)0xe5a4,
(ee_u16)0x8e3a,
(ee_u16)0x8d84 };
void *
iterate(void *pres)
{
ee_u32 i;
ee_u16 crc;
core_results *res = (core_results *)pres;
ee_u32 iterations = res->iterations;
res->crc = 0;
res->crclist = 0;
res->crcmatrix = 0;
res->crcstate = 0;
for (i=0; i<iterations; i++) {
crc=core_bench_list(res,1);
res->crc=crcu16(crc,res->crc);
crc=core_bench_list(res,-1);
res->crc=crcu16(crc,res->crc);
if (i==0) res->crclist=res->crc;
}
return NULL;
for (i = 0; i < iterations; i++)
{
crc = core_bench_list(res, 1);
res->crc = crcu16(crc, res->crc);
crc = core_bench_list(res, -1);
res->crc = crcu16(crc, res->crc);
if (i == 0)
res->crclist = res->crc;
}
return NULL;
}
#if (SEED_METHOD==SEED_ARG)
#if (SEED_METHOD == SEED_ARG)
ee_s32 get_seed_args(int i, int argc, char *argv[]);
#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv)
#define get_seed_32(x) get_seed_args(x,argc,argv)
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
#define get_seed_32(x) get_seed_args(x, argc, argv)
#else /* via function or volatile */
ee_s32 get_seed_32(int i);
#define get_seed(x) (ee_s16)get_seed_32(x)
#define get_seed(x) (ee_s16) get_seed_32(x)
#endif
#if (MEM_METHOD==MEM_STATIC)
#if (MEM_METHOD == MEM_STATIC)
ee_u8 static_memblk[TOTAL_DATA_SIZE];
#endif
char *mem_name[3] = {"Static","Heap","Stack"};
char *mem_name[3] = { "Static", "Heap", "Stack" };
/* Function: main
Main entry routine for the benchmark.
This function is responsible for the following steps:
Main entry routine for the benchmark.
This function is responsible for the following steps:
1 - Initialize input seeds from a source that cannot be determined at compile time.
2 - Initialize memory block for use.
3 - Run and time the benchmark.
4 - Report results, testing the validity of the output if the seeds are known.
1 - Initialize input seeds from a source that cannot be determined at
compile time. 2 - Initialize memory block for use. 3 - Run and time the
benchmark. 4 - Report results, testing the validity of the output if the
seeds are known.
Arguments:
1 - first seed : Any value
2 - second seed : Must be identical to first for iterations to be identical
3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
Arguments:
1 - first seed : Any value
2 - second seed : Must be identical to first for iterations to be
identical 3 - third seed : Any value, should be at least an order of
magnitude less then the input size, but bigger then 32. 4 - Iterations :
Special, if set to 0, iterations will be automatically determined such that
the benchmark will run between 10 to 100 secs
*/
#if MAIN_HAS_NOARGC
MAIN_RETURN_TYPE main(void) {
int argc=0;
char *argv[1];
MAIN_RETURN_TYPE
main(void)
{
int argc = 0;
char *argv[1];
#else
MAIN_RETURN_TYPE main(int argc, char *argv[]) {
MAIN_RETURN_TYPE
main(int argc, char *argv[])
{
#endif
//const char s[] = "Elizabeth";
//ee_printf("eeprint");
//ee_printf("Trying to print: %d", 0);
/*gg_printf("Elizabeth");*/
//sendstring("Elizabeth");
//sendstring(s);
//return(0);
ee_u16 i,j=0,num_algorithms=0;
ee_s16 known_id=-1,total_errors=0;
ee_u16 seedcrc=0;
CORE_TICKS total_time;
core_results results[MULTITHREAD];
#if (MEM_METHOD==MEM_STACK)
ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD];
ee_printf("SHOWTIME\n");
ee_u16 i, j = 0, num_algorithms = 0;
ee_s16 known_id = -1, total_errors = 0;
ee_u16 seedcrc = 0;
CORE_TICKS total_time;
core_results results[MULTITHREAD];
#if (MEM_METHOD == MEM_STACK)
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
#endif
/* first call any initializations needed */
portable_init(&(results[0].port), &argc, argv);
/* First some checks to make sure benchmark will run ok */
if (sizeof(struct list_head_s)>128) {
ee_printf("list_head structure too big for comparable data!\n");
return MAIN_RETURN_VAL;
}
results[0].seed1=get_seed(1);
results[0].seed2=get_seed(2);
results[0].seed3=get_seed(3);
results[0].iterations=get_seed_32(4);
/* first call any initializations needed */
portable_init(&(results[0].port), &argc, argv);
/* First some checks to make sure benchmark will run ok */
if (sizeof(struct list_head_s) > 128)
{
ee_printf("list_head structure too big for comparable data!\n");
return MAIN_RETURN_VAL;
}
results[0].seed1 = get_seed(1);
results[0].seed2 = get_seed(2);
results[0].seed3 = get_seed(3);
results[0].iterations = get_seed_32(4);
#if CORE_DEBUG
results[0].iterations=1;
results[0].iterations = 1;
#endif
results[0].execs=get_seed_32(5);
if (results[0].execs==0) { /* if not supplied, execute all algorithms */
results[0].execs=ALL_ALGORITHMS_MASK;
}
/* put in some default values based on one seed only for easy testing */
if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */
results[0].seed1=0;
results[0].seed2=0;
results[0].seed3=0x66;
}
if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */
results[0].seed1=0x3415;
results[0].seed2=0x3415;
results[0].seed3=0x66;
}
#if (MEM_METHOD==MEM_STATIC)
results[0].memblock[0]=(void *)static_memblk;
results[0].size=TOTAL_DATA_SIZE;
ee_printf("%d \n total data size", TOTAL_DATA_SIZE);
results[0].err=0;
#if (MULTITHREAD>1)
#error "Cannot use a static data area with multiple contexts!"
#endif
#elif (MEM_METHOD==MEM_MALLOC)
for (i=0 ; i<MULTITHREAD; i++) {
ee_s32 malloc_override=get_seed(7);
if (malloc_override != 0)
results[i].size=malloc_override;
ee_printf("%d \n malloc datasize", malloc_override);
else
results[i].size=TOTAL_DATA_SIZE;
results[i].memblock[0]=portable_malloc(results[i].size);
results[i].seed1=results[0].seed1;
results[i].seed2=results[0].seed2;
results[i].seed3=results[0].seed3;
results[i].err=0;
results[i].execs=results[0].execs;
}
#elif (MEM_METHOD==MEM_STACK)
for (i=0 ; i<MULTITHREAD; i++) {
results[i].memblock[0]=stack_memblock+i*TOTAL_DATA_SIZE;
results[i].size=TOTAL_DATA_SIZE;
results[i].seed1=results[0].seed1;
results[i].seed2=results[0].seed2;
results[i].seed3=results[0].seed3;
results[i].err=0;
results[i].execs=results[0].execs;
}
results[0].execs = get_seed_32(5);
if (results[0].execs == 0)
{ /* if not supplied, execute all algorithms */
results[0].execs = ALL_ALGORITHMS_MASK;
}
/* put in some default values based on one seed only for easy testing */
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* perfromance run */
results[0].seed1 = 0;
results[0].seed2 = 0;
results[0].seed3 = 0x66;
}
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
&& (results[0].seed3 == 0))
{ /* validation run */
results[0].seed1 = 0x3415;
results[0].seed2 = 0x3415;
results[0].seed3 = 0x66;
}
#if (MEM_METHOD == MEM_STATIC)
results[0].memblock[0] = (void *)static_memblk;
results[0].size = TOTAL_DATA_SIZE;
results[0].err = 0;
#if (MULTITHREAD > 1)
#error "Cannot use a static data area with multiple contexts!"
#endif
#elif (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
{
ee_s32 malloc_override = get_seed(7);
if (malloc_override != 0)
results[i].size = malloc_override;
else
results[i].size = TOTAL_DATA_SIZE;
results[i].memblock[0] = portable_malloc(results[i].size);
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#elif (MEM_METHOD == MEM_STACK)
for (i = 0; i < MULTITHREAD; i++)
{
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
results[i].size = TOTAL_DATA_SIZE;
results[i].seed1 = results[0].seed1;
results[i].seed2 = results[0].seed2;
results[i].seed3 = results[0].seed3;
results[i].err = 0;
results[i].execs = results[0].execs;
}
#else
#error "Please define a way to initialize a memory block."
#endif
/* Data init */
/* Find out how space much we have based on number of algorithms */
for (i=0; i<NUM_ALGORITHMS; i++) {
if ((1<<(ee_u32)i) & results[0].execs)
num_algorithms++;
}
for (i=0 ; i<MULTITHREAD; i++)
results[i].size=results[i].size/num_algorithms;
/* Assign pointers */
for (i=0; i<NUM_ALGORITHMS; i++) {
ee_u32 ctx;
if ((1<<(ee_u32)i) & results[0].execs) {
for (ctx=0 ; ctx<MULTITHREAD; ctx++)
results[ctx].memblock[i+1]=(char *)(results[ctx].memblock[0])+results[0].size*j;
j++;
}
}
/* call inits */
for (i=0 ; i<MULTITHREAD; i++) {
if (results[i].execs & ID_LIST) {
ee_printf("loop");
ee_printf("%d \n", MULTITHREAD);
ee_printf("%d \n sizethread ", results[0].size);
/* Data init */
/* Find out how space much we have based on number of algorithms */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
if ((1 << (ee_u32)i) & results[0].execs)
num_algorithms++;
}
for (i = 0; i < MULTITHREAD; i++)
results[i].size = results[i].size / num_algorithms;
/* Assign pointers */
for (i = 0; i < NUM_ALGORITHMS; i++)
{
ee_u32 ctx;
if ((1 << (ee_u32)i) & results[0].execs)
{
for (ctx = 0; ctx < MULTITHREAD; ctx++)
results[ctx].memblock[i + 1]
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
j++;
}
}
/* call inits */
for (i = 0; i < MULTITHREAD; i++)
{
if (results[i].execs & ID_LIST)
{
results[i].list = core_list_init(
results[0].size, results[i].memblock[1], results[i].seed1);
}
if (results[i].execs & ID_MATRIX)
{
core_init_matrix(results[0].size,
results[i].memblock[2],
(ee_s32)results[i].seed1
| (((ee_s32)results[i].seed2) << 16),
&(results[i].mat));
}
if (results[i].execs & ID_STATE)
{
core_init_state(
results[0].size, results[i].seed1, results[i].memblock[3]);
}
}
results[i].list=core_list_init(results[0].size,results[i].memblock[1],results[i].seed1);
}
if (results[i].execs & ID_MATRIX) {
core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat) );
}
if (results[i].execs & ID_STATE) {
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
}
}
/*int foreverLoop = 1;
secs_ret timing = 0;
int timingInt;
ee_printf("\nENTERING FOREVER WHILE LOOP\n");
while(foreverLoop == 1)
{
start_time();
//filler
stop_time();
timing += time_in_secs(get_time());
timingInt = (int)timing;
ee_printf("Timing is %d\n", timingInt);
}/*
/* automatically determine number of iterations if not set */
if (results[0].iterations==0) {
secs_ret secs_passed=0;
ee_u32 divisor;
results[0].iterations=1;
int iterationInc = 0;
ee_printf("\n\nENTERING ITERATION WHILE LOOP\n");
while (secs_passed < (secs_ret)1) {
if(iterationInc != 0)
{
results[0].iterations++;
}
ee_printf("iterations is %d\n", results[0].iterations);
start_time();
iterate(&results[0]);
stop_time();
secs_passed = time_in_secs(get_time());
int secs_passed_int = (int)secs_passed;
ee_printf("secs passed is %d\n", secs_passed_int);
iterationInc++;
}
ee_printf("LEAVING ITERATION WHILE LOOP!\n\n");
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
divisor=(ee_u32)secs_passed;
ee_printf("divisor is %lu\n", divisor);
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
divisor=1;
results[0].iterations*=1+10/divisor;
ee_printf("iterations is %d\n", results[0].iterations);
}
/* perform actual benchmark */
ee_printf("Starting benchmark\n");
start_time();
#if (MULTITHREAD>1)
if (default_num_contexts>MULTITHREAD) {
default_num_contexts=MULTITHREAD;
}
for (i=0 ; i<default_num_contexts; i++) {
results[i].iterations=results[0].iterations;
results[i].execs=results[0].execs;
core_start_parallel(&results[i]);
}
for (i=0 ; i<default_num_contexts; i++) {
core_stop_parallel(&results[i]);
}
/* automatically determine number of iterations if not set */
if (results[0].iterations == 0)
{
secs_ret secs_passed = 0;
ee_u32 divisor;
results[0].iterations = 1;
while (secs_passed < (secs_ret)1)
{
results[0].iterations *= 10;
start_time();
iterate(&results[0]);
stop_time();
secs_passed = time_in_secs(get_time());
}
/* now we know it executes for at least 1 sec, set actual run time at
* about 10 secs */
divisor = (ee_u32)secs_passed;
if (divisor == 0) /* some machines cast float to int as 0 since this
conversion is not defined by ANSI, but we know at
least one second passed */
divisor = 1;
results[0].iterations *= 1 + 10 / divisor;
}
/* perform actual benchmark */
start_time();
#if (MULTITHREAD > 1)
if (default_num_contexts > MULTITHREAD)
{
default_num_contexts = MULTITHREAD;
}
for (i = 0; i < default_num_contexts; i++)
{
results[i].iterations = results[0].iterations;
results[i].execs = results[0].execs;
core_start_parallel(&results[i]);
}
for (i = 0; i < default_num_contexts; i++)
{
core_stop_parallel(&results[i]);
}
#else
iterate(&results[0]);
iterate(&results[0]);
#endif
stop_time();
total_time=get_time();
ee_printf("total time is %u\n", total_time);
ee_printf("ending benchmark\n");
/* get a function of the input to report */
seedcrc=crc16(results[0].seed1,seedcrc);
seedcrc=crc16(results[0].seed2,seedcrc);
seedcrc=crc16(results[0].seed3,seedcrc);
seedcrc=crc16(results[0].size,seedcrc);
switch (seedcrc) { /* test known output for common seeds */
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
known_id=0;
ee_printf("6k performance run parameters for coremark.\n");
break;
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */
known_id=1;
ee_printf("6k validation run parameters for coremark.\n");
break;
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */
known_id=2;
ee_printf("Profile generation run parameters for coremark.\n");
break;
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
known_id=3;
ee_printf("2K performance run parameters for coremark.\n");
break;
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */
known_id=4;
ee_printf("2K validation run parameters for coremark.\n");
break;
default:
total_errors=-1;
break;
}
if (known_id>=0) {
for (i=0 ; i<default_num_contexts; i++) {
results[i].err=0;
if ((results[i].execs & ID_LIST) &&
(results[i].crclist!=list_known_crc[known_id])) {
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",i,results[i].crclist,list_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_MATRIX) &&
(results[i].crcmatrix!=matrix_known_crc[known_id])) {
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",i,results[i].crcmatrix,matrix_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_STATE) &&
(results[i].crcstate!=state_known_crc[known_id])) {
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",i,results[i].crcstate,state_known_crc[known_id]);
results[i].err++;
}
total_errors+=results[i].err;
}
}
total_errors+=check_data_types();
/* and report results */
//ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size);
ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size);
ee_printf("Total ticks : %lu\n", (long unsigned) total_time);
#if HAS_FLOAT
ee_printf("Total time (secs): %f\n",time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
#else
ee_printf("Total time (secs): %d\n,time_in_secs(total_time)");
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
#endif
if (time_in_secs(total_time) < 10) {
ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n");
total_errors++;
}
stop_time();
total_time = get_time();
/* get a function of the input to report */
seedcrc = crc16(results[0].seed1, seedcrc);
seedcrc = crc16(results[0].seed2, seedcrc);
seedcrc = crc16(results[0].seed3, seedcrc);
seedcrc = crc16(results[0].size, seedcrc);
ee_printf("Iterations : %lu\n", (long unsigned) default_num_contexts*results[0].iterations);
ee_printf("Compiler version : %s\n",COMPILER_VERSION);
ee_printf("Compiler flags : %s\n",COMPILER_FLAGS);
#if (MULTITHREAD>1)
ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts);
#endif
ee_printf("Memory location : %s\n",MEM_LOCATION);
/* output for verification */
ee_printf("seedcrc : 0x%04x\n",seedcrc);
if (results[0].execs & ID_LIST)
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crclist : 0x%04x\n",i,results[i].crclist);
if (results[0].execs & ID_MATRIX)
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcmatrix : 0x%04x\n",i,results[i].crcmatrix);
if (results[0].execs & ID_STATE)
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
if (total_errors==0) {
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
switch (seedcrc)
{ /* test known output for common seeds */
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
known_id = 0;
ee_printf("6k performance run parameters for coremark.\n");
break;
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
algorithm */
known_id = 1;
ee_printf("6k validation run parameters for coremark.\n");
break;
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
*/
known_id = 2;
ee_printf("Profile generation run parameters for coremark.\n");
break;
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
known_id = 3;
ee_printf("2K performance run parameters for coremark.\n");
break;
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
algorithm */
known_id = 4;
ee_printf("2K validation run parameters for coremark.\n");
break;
default:
total_errors = -1;
break;
}
if (known_id >= 0)
{
for (i = 0; i < default_num_contexts; i++)
{
results[i].err = 0;
if ((results[i].execs & ID_LIST)
&& (results[i].crclist != list_known_crc[known_id]))
{
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
i,
results[i].crclist,
list_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_MATRIX)
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
{
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcmatrix,
matrix_known_crc[known_id]);
results[i].err++;
}
if ((results[i].execs & ID_STATE)
&& (results[i].crcstate != state_known_crc[known_id]))
{
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
i,
results[i].crcstate,
state_known_crc[known_id]);
results[i].err++;
}
total_errors += results[i].err;
}
}
total_errors += check_data_types();
/* and report results */
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
#if HAS_FLOAT
if (known_id==3) {
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %f\n",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time));
#else
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
if (time_in_secs(total_time) > 0)
ee_printf("Iterations/Sec : %d\n",
default_num_contexts * results[0].iterations
/ time_in_secs(total_time));
#endif
if (time_in_secs(total_time) < 10)
{
ee_printf(
"ERROR! Must execute for at least 10 secs for a valid result!\n");
total_errors++;
}
ee_printf("Iterations : %lu\n",
(long unsigned)default_num_contexts * results[0].iterations);
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
#if (MULTITHREAD > 1)
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
#endif
ee_printf("Memory location : %s\n", MEM_LOCATION);
/* output for verification */
ee_printf("seedcrc : 0x%04x\n", seedcrc);
if (results[0].execs & ID_LIST)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
if (results[0].execs & ID_MATRIX)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
if (results[0].execs & ID_STATE)
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
for (i = 0; i < default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
if (total_errors == 0)
{
ee_printf(
"Correct operation validated. See README.md for run and reporting "
"rules.\n");
#if HAS_FLOAT
if (known_id == 3)
{
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
secs_ret totalmsecs = time_in_secs(total_time);
int totalmint = (int) totalmsecs;
ee_printf("ELAPSED S: %d\n", totalmint);
ee_printf("ELAPSED TIME: %d\n", totalmint);
ee_printf("CoreMark 1.0 : %d / %s %s\n",tmp,COMPILER_VERSION,COMPILER_FLAGS);
ee_printf("CoreMark 1.0 : %d / %s %s",
tmp,
COMPILER_VERSION,
COMPILER_FLAGS);
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
ee_printf(" / %s",MEM_LOCATION);
ee_printf(" / %s", MEM_LOCATION);
#else
ee_printf(" / %s",mem_name[MEM_METHOD]);
ee_printf(" / %s", mem_name[MEM_METHOD]);
#endif
#if (MULTITHREAD>1)
ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD);
#if (MULTITHREAD > 1)
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
#endif
ee_printf("\n");
}
ee_printf("\n");
}
#endif
}
if (total_errors>0)
ee_printf("Errors detected\n");
if (total_errors<0)
ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n");
}
if (total_errors > 0)
ee_printf("Errors detected\n");
if (total_errors < 0)
ee_printf(
"Cannot validate operation for these seed values, please compare "
"with results on a known platform.\n");
#if (MEM_METHOD==MEM_MALLOC)
for (i=0 ; i<MULTITHREAD; i++)
portable_free(results[i].memblock[0]);
#if (MEM_METHOD == MEM_MALLOC)
for (i = 0; i < MULTITHREAD; i++)
portable_free(results[i].memblock[0]);
#endif
/* And last call any target specific code for finalizing */
portable_fini(&(results[0].port));
/* And last call any target specific code for finalizing */
portable_fini(&(results[0].port));
return MAIN_RETURN_VAL;
return MAIN_RETURN_VAL;
}
//pls

View File

@ -19,290 +19,341 @@ Original Author: Shay Gal-on
#include "coremark.h"
/*
Topic: Description
Matrix manipulation benchmark
This very simple algorithm forms the basis of many more complex algorithms.
The tight inner loop is the focus of many optimizations (compiler as well as hardware based)
and is thus relevant for embedded processing.
The total available data space will be divided to 3 parts:
NxN Matrix A - initialized with small values (upper 3/4 of the bits all zero).
NxN Matrix B - initialized with medium values (upper half of the bits all zero).
NxN Matrix C - used for the result.
Matrix manipulation benchmark
The actual values for A and B must be derived based on input that is not available at compile time.
This very simple algorithm forms the basis of many more complex
algorithms.
The tight inner loop is the focus of many optimizations (compiler as
well as hardware based) and is thus relevant for embedded processing.
The total available data space will be divided to 3 parts:
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
zero). NxN Matrix C - used for the result.
The actual values for A and B must be derived based on input that is not
available at compile time.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
#define matrix_test_next(x) (x+1)
#define matrix_clip(x,y) ((y) ? (x) & 0x0ff : (x) & 0x0ffff)
#define matrix_big(x) (0xf000 | (x))
#define bit_extract(x,from,to) (((x)>>(from)) & (~(0xffffffff << (to))))
#define matrix_test_next(x) (x + 1)
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
#define matrix_big(x) (0xf000 | (x))
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
#if CORE_DEBUG
void printmat(MATDAT *A, ee_u32 N, char *name) {
ee_u32 i,j;
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
if (j!=0)
ee_printf(",");
ee_printf("%d",A[i*N+j]);
}
ee_printf("\n");
}
void
printmat(MATDAT *A, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", A[i * N + j]);
}
ee_printf("\n");
}
}
void printmatC(MATRES *C, ee_u32 N, char *name) {
ee_u32 i,j;
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
if (j!=0)
ee_printf(",");
ee_printf("%d",C[i*N+j]);
}
ee_printf("\n");
}
void
printmatC(MATRES *C, ee_u32 N, char *name)
{
ee_u32 i, j;
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (j != 0)
ee_printf(",");
ee_printf("%d", C[i * N + j]);
}
ee_printf("\n");
}
}
#endif
/* Function: core_bench_matrix
Benchmark function
Benchmark function
Iterate <matrix_test> N times,
changing the matrix values slightly by a constant amount each time.
Iterate <matrix_test> N times,
changing the matrix values slightly by a constant amount each time.
*/
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) {
ee_u32 N=p->N;
MATRES *C=p->C;
MATDAT *A=p->A;
MATDAT *B=p->B;
MATDAT val=(MATDAT)seed;
ee_u16
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
{
ee_u32 N = p->N;
MATRES *C = p->C;
MATDAT *A = p->A;
MATDAT *B = p->B;
MATDAT val = (MATDAT)seed;
crc=crc16(matrix_test(N,C,A,B,val),crc);
crc = crc16(matrix_test(N, C, A, B, val), crc);
return crc;
return crc;
}
/* Function: matrix_test
Perform matrix manipulation.
Perform matrix manipulation.
Parameters:
N - Dimensions of the matrix.
C - memory for result matrix.
A - input matrix
B - operator matrix (not changed during operations)
Parameters:
N - Dimensions of the matrix.
C - memory for result matrix.
A - input matrix
B - operator matrix (not changed during operations)
Returns:
A CRC value that captures all results calculated in the function.
In particular, crc of the value calculated on the result matrix
after each step by <matrix_sum>.
Returns:
A CRC value that captures all results calculated in the function.
In particular, crc of the value calculated on the result matrix
after each step by <matrix_sum>.
Operation:
1 - Add a constant value to all elements of a matrix.
2 - Multiply a matrix by a constant.
3 - Multiply a matrix by a vector.
4 - Multiply a matrix by a matrix.
5 - Add a constant value to all elements of a matrix.
Operation:
After the last step, matrix A is back to original contents.
1 - Add a constant value to all elements of a matrix.
2 - Multiply a matrix by a constant.
3 - Multiply a matrix by a vector.
4 - Multiply a matrix by a matrix.
5 - Add a constant value to all elements of a matrix.
After the last step, matrix A is back to original contents.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) {
ee_u16 crc=0;
MATDAT clipval=matrix_big(val);
ee_s16
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
{
ee_u16 crc = 0;
MATDAT clipval = matrix_big(val);
matrix_add_const(N,A,val); /* make sure data changes */
matrix_add_const(N, A, val); /* make sure data changes */
#if CORE_DEBUG
printmat(A,N,"matrix_add_const");
printmat(A, N, "matrix_add_const");
#endif
matrix_mul_const(N,C,A,val);
crc=crc16(matrix_sum(N,C,clipval),crc);
matrix_mul_const(N, C, A, val);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_const");
printmatC(C, N, "matrix_mul_const");
#endif
matrix_mul_vect(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
matrix_mul_vect(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_vect");
printmatC(C, N, "matrix_mul_vect");
#endif
matrix_mul_matrix(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
matrix_mul_matrix(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_matrix");
printmatC(C, N, "matrix_mul_matrix");
#endif
matrix_mul_matrix_bitextract(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
matrix_mul_matrix_bitextract(N, C, A, B);
crc = crc16(matrix_sum(N, C, clipval), crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_matrix_bitextract");
printmatC(C, N, "matrix_mul_matrix_bitextract");
#endif
matrix_add_const(N,A,-val); /* return matrix to initial value */
return crc;
matrix_add_const(N, A, -val); /* return matrix to initial value */
return crc;
}
/* Function : matrix_init
Initialize the memory block for matrix benchmarking.
Initialize the memory block for matrix benchmarking.
Parameters:
blksize - Size of memory to be initialized.
memblk - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
p - pointers to <mat_params> containing initialized matrixes.
Parameters:
blksize - Size of memory to be initialized.
memblk - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
p - pointers to <mat_params> containing initialized matrixes.
Returns:
Matrix dimensions.
Note:
The seed parameter MUST be supplied from a source that cannot be determined at compile time
Returns:
Matrix dimensions.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) {
ee_u32 N=0;
MATDAT *A;
MATDAT *B;
ee_s32 order=1;
MATDAT val;
ee_u32 i=0,j=0;
if (seed==0)
seed=1;
while (j<blksize) {
i++;
j=i*i*2*4;
}
N=i-1;
A=(MATDAT *)align_mem(memblk);
B=A+N*N;
ee_u32
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
{
ee_u32 N = 0;
MATDAT *A;
MATDAT *B;
ee_s32 order = 1;
MATDAT val;
ee_u32 i = 0, j = 0;
if (seed == 0)
seed = 1;
while (j < blksize)
{
i++;
j = i * i * 2 * 4;
}
N = i - 1;
A = (MATDAT *)align_mem(memblk);
B = A + N * N;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
seed = ( ( order * seed ) % 65536 );
val = (seed + order);
val=matrix_clip(val,0);
B[i*N+j] = val;
val = (val + order);
val=matrix_clip(val,1);
A[i*N+j] = val;
order++;
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
seed = ((order * seed) % 65536);
val = (seed + order);
val = matrix_clip(val, 0);
B[i * N + j] = val;
val = (val + order);
val = matrix_clip(val, 1);
A[i * N + j] = val;
order++;
}
}
p->A=A;
p->B=B;
p->C=(MATRES *)align_mem(B+N*N);
p->N=N;
p->A = A;
p->B = B;
p->C = (MATRES *)align_mem(B + N * N);
p->N = N;
#if CORE_DEBUG
printmat(A,N,"A");
printmat(B,N,"B");
printmat(A, N, "A");
printmat(B, N, "B");
#endif
return N;
return N;
}
/* Function: matrix_sum
Calculate a function that depends on the values of elements in the matrix.
Calculate a function that depends on the values of elements in the
matrix.
For each element, accumulate into a temporary variable.
As long as this value is under the parameter clipval,
add 1 to the result if the element is bigger then the previous.
Otherwise, reset the accumulator and add 10 to the result.
For each element, accumulate into a temporary variable.
As long as this value is under the parameter clipval,
add 1 to the result if the element is bigger then the previous.
Otherwise, reset the accumulator and add 10 to the result.
*/
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) {
MATRES tmp=0,prev=0,cur=0;
ee_s16 ret=0;
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
cur=C[i*N+j];
tmp+=cur;
if (tmp>clipval) {
ret+=10;
tmp=0;
} else {
ret += (cur>prev) ? 1 : 0;
}
prev=cur;
}
}
return ret;
ee_s16
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
{
MATRES tmp = 0, prev = 0, cur = 0;
ee_s16 ret = 0;
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
cur = C[i * N + j];
tmp += cur;
if (tmp > clipval)
{
ret += 10;
tmp = 0;
}
else
{
ret += (cur > prev) ? 1 : 0;
}
prev = cur;
}
}
return ret;
}
/* Function: matrix_mul_const
Multiply a matrix by a constant.
This could be used as a scaler for instance.
Multiply a matrix by a constant.
This could be used as a scaler for instance.
*/
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) {
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=(MATRES)A[i*N+j] * (MATRES)val;
}
}
void
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
}
}
}
/* Function: matrix_add_const
Add a constant value to all elements of a matrix.
Add a constant value to all elements of a matrix.
*/
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) {
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
A[i*N+j] += val;
}
}
void
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i * N + j] += val;
}
}
}
/* Function: matrix_mul_vect
Multiply a matrix by a vector.
This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)
Multiply a matrix by a vector.
This is common in many simple filters (e.g. fir where a vector of
coefficients is applied to the matrix.)
*/
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j;
for (i=0; i<N; i++) {
C[i]=0;
for (j=0; j<N; j++) {
C[i]+=(MATRES)A[i*N+j] * (MATRES)B[j];
}
}
void
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j;
for (i = 0; i < N; i++)
{
C[i] = 0;
for (j = 0; j < N; j++)
{
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
}
}
}
/* Function: matrix_mul_matrix
Multiply a matrix by a matrix.
Basic code is used in many algorithms, mostly with minor changes such as scaling.
Multiply a matrix by a matrix.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j,k;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=0;
for(k=0;k<N;k++)
{
C[i*N+j]+=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
}
}
}
void
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
}
}
}
}
/* Function: matrix_mul_matrix_bitextract
Multiply a matrix by a matrix, and extract some bits from the result.
Basic code is used in many algorithms, mostly with minor changes such as scaling.
Multiply a matrix by a matrix, and extract some bits from the result.
Basic code is used in many algorithms, mostly with minor changes such as
scaling.
*/
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j,k;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=0;
for(k=0;k<N;k++)
{
MATRES tmp=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
C[i*N+j]+=bit_extract(tmp,2,4)*bit_extract(tmp,5,7);
}
}
}
void
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
{
ee_u32 i, j, k;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
C[i * N + j] = 0;
for (k = 0; k < N; k++)
{
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
}
}
}
}

View File

@ -18,260 +18,313 @@ Original Author: Shay Gal-on
#include "coremark.h"
/* local functions */
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count);
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
/*
Topic: Description
Simple state machines like this one are used in many embedded products.
For more complex state machines, sometimes a state transition table implementation is used instead,
trading speed of direct coding for ease of maintenance.
Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour,
we are using a small moore machine.
In particular, this machine tests type of string input,
trying to determine whether the input is a number or something else.
(see core_state.png).
Simple state machines like this one are used in many embedded products.
For more complex state machines, sometimes a state transition table
implementation is used instead, trading speed of direct coding for ease of
maintenance.
Since the main goal of using a state machine in CoreMark is to excercise
the switch/if behaviour, we are using a small moore machine.
In particular, this machine tests type of string input,
trying to determine whether the input is a number or something else.
(see core_state.png).
*/
/* Function: core_bench_state
Benchmark function
Benchmark function
Go over the input twice, once direct, and once after introducing some corruption.
Go over the input twice, once direct, and once after introducing some
corruption.
*/
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc)
ee_u16
core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
{
ee_u32 final_counts[NUM_CORE_STATES];
ee_u32 track_counts[NUM_CORE_STATES];
ee_u8 *p=memblock;
ee_u32 i;
ee_u32 final_counts[NUM_CORE_STATES];
ee_u32 track_counts[NUM_CORE_STATES];
ee_u8 *p = memblock;
ee_u32 i;
#if CORE_DEBUG
ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc);
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
#endif
for (i=0; i<NUM_CORE_STATES; i++) {
final_counts[i]=track_counts[i]=0;
}
/* run the state machine over the input */
while (*p!=0) {
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
final_counts[fstate]++;
for (i = 0; i < NUM_CORE_STATES; i++)
{
final_counts[i] = track_counts[i] = 0;
}
/* run the state machine over the input */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,",fstate);
}
ee_printf("\n");
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
}
#endif
p=memblock;
while (p < (memblock+blksize)) { /* insert some corruption */
if (*p!=',')
*p^=(ee_u8)seed1;
p+=step;
}
p=memblock;
/* run the state machine over the input again */
while (*p!=0) {
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
final_counts[fstate]++;
p = memblock;
while (p < (memblock + blksize))
{ /* insert some corruption */
if (*p != ',')
*p ^= (ee_u8)seed1;
p += step;
}
p = memblock;
/* run the state machine over the input again */
while (*p != 0)
{
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,",fstate);
}
ee_printf("\n");
ee_printf("%d,", fstate);
}
ee_printf("\n");
#else
}
}
#endif
p=memblock;
while (p < (memblock+blksize)) { /* undo corruption is seed1 and seed2 are equal */
if (*p!=',')
*p^=(ee_u8)seed2;
p+=step;
}
/* end timing */
for (i=0; i<NUM_CORE_STATES; i++) {
crc=crcu32(final_counts[i],crc);
crc=crcu32(track_counts[i],crc);
}
return crc;
p = memblock;
while (p < (memblock + blksize))
{ /* undo corruption is seed1 and seed2 are equal */
if (*p != ',')
*p ^= (ee_u8)seed2;
p += step;
}
/* end timing */
for (i = 0; i < NUM_CORE_STATES; i++)
{
crc = crcu32(final_counts[i], crc);
crc = crcu32(track_counts[i], crc);
}
return crc;
}
/* Default initialization patterns */
static ee_u8 *intpat[4] ={(ee_u8 *)"5012",(ee_u8 *)"1234",(ee_u8 *)"-874",(ee_u8 *)"+122"};
static ee_u8 *floatpat[4]={(ee_u8 *)"35.54400",(ee_u8 *)".1234500",(ee_u8 *)"-110.700",(ee_u8 *)"+0.64400"};
static ee_u8 *scipat[4] ={(ee_u8 *)"5.500e+3",(ee_u8 *)"-.123e-2",(ee_u8 *)"-87e+832",(ee_u8 *)"+0.6e-12"};
static ee_u8 *errpat[4] ={(ee_u8 *)"T0.3e-1F",(ee_u8 *)"-T.T++Tq",(ee_u8 *)"1T3.4e4z",(ee_u8 *)"34.0e-T^"};
static ee_u8 *intpat[4]
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
(ee_u8 *)".1234500",
(ee_u8 *)"-110.700",
(ee_u8 *)"+0.64400" };
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
(ee_u8 *)"-.123e-2",
(ee_u8 *)"-87e+832",
(ee_u8 *)"+0.6e-12" };
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
(ee_u8 *)"-T.T++Tq",
(ee_u8 *)"1T3.4e4z",
(ee_u8 *)"34.0e-T^" };
/* Function: core_init_state
Initialize the input data for the state machine.
Initialize the input data for the state machine.
Populate the input with several predetermined strings, interspersed.
Actual patterns chosen depend on the seed parameter.
Note:
The seed parameter MUST be supplied from a source that cannot be determined at compile time
Populate the input with several predetermined strings, interspersed.
Actual patterns chosen depend on the seed parameter.
Note:
The seed parameter MUST be supplied from a source that cannot be
determined at compile time
*/
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) {
ee_u32 total=0,next=0,i;
ee_u8 *buf=0;
void
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
{
ee_u32 total = 0, next = 0, i;
ee_u8 *buf = 0;
#if CORE_DEBUG
ee_u8 *start=p;
ee_printf("State: %d,%d\n",size,seed);
ee_u8 *start = p;
ee_printf("State: %d,%d\n", size, seed);
#endif
size--;
next=0;
while ((total+next+1)<size) {
if (next>0) {
for(i=0;i<next;i++)
*(p+total+i)=buf[i];
*(p+total+i)=',';
total+=next+1;
}
seed++;
switch (seed & 0x7) {
case 0: /* int */
case 1: /* int */
case 2: /* int */
buf=intpat[(seed>>3) & 0x3];
next=4;
break;
case 3: /* float */
case 4: /* float */
buf=floatpat[(seed>>3) & 0x3];
next=8;
break;
case 5: /* scientific */
case 6: /* scientific */
buf=scipat[(seed>>3) & 0x3];
next=8;
break;
case 7: /* invalid */
buf=errpat[(seed>>3) & 0x3];
next=8;
break;
default: /* Never happen, just to make some compilers happy */
break;
}
}
size++;
while (total<size) { /* fill the rest with 0 */
*(p+total)=0;
total++;
}
size--;
next = 0;
while ((total + next + 1) < size)
{
if (next > 0)
{
for (i = 0; i < next; i++)
*(p + total + i) = buf[i];
*(p + total + i) = ',';
total += next + 1;
}
seed++;
switch (seed & 0x7)
{
case 0: /* int */
case 1: /* int */
case 2: /* int */
buf = intpat[(seed >> 3) & 0x3];
next = 4;
break;
case 3: /* float */
case 4: /* float */
buf = floatpat[(seed >> 3) & 0x3];
next = 8;
break;
case 5: /* scientific */
case 6: /* scientific */
buf = scipat[(seed >> 3) & 0x3];
next = 8;
break;
case 7: /* invalid */
buf = errpat[(seed >> 3) & 0x3];
next = 8;
break;
default: /* Never happen, just to make some compilers happy */
break;
}
}
size++;
while (total < size)
{ /* fill the rest with 0 */
*(p + total) = 0;
total++;
}
#if CORE_DEBUG
ee_printf("State Input: %s\n",start);
ee_printf("State Input: %s\n", start);
#endif
}
static ee_u8 ee_isdigit(ee_u8 c) {
ee_u8 retval;
retval = ((c>='0') & (c<='9')) ? 1 : 0;
return retval;
static ee_u8
ee_isdigit(ee_u8 c)
{
ee_u8 retval;
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
return retval;
}
/* Function: core_state_transition
Actual state machine.
Actual state machine.
The state machine will continue scanning until either:
1 - an invalid input is detcted.
2 - a valid number has been detected.
The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid).
The state machine will continue scanning until either:
1 - an invalid input is detcted.
2 - a valid number has been detected.
The input pointer is updated to point to the end of the token, and the
end state is returned (either specific format determined or invalid).
*/
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) {
ee_u8 *str=*instr;
ee_u8 NEXT_SYMBOL;
enum CORE_STATE state=CORE_START;
for( ; *str && state != CORE_INVALID; str++ ) {
NEXT_SYMBOL = *str;
if (NEXT_SYMBOL==',') /* end of this input */ {
str++;
break;
}
switch(state) {
case CORE_START:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INT;
}
else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
state = CORE_S1;
}
else if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
}
else {
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
transition_count[CORE_START]++;
break;
case CORE_S1:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INT;
transition_count[CORE_S1]++;
}
else if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
transition_count[CORE_S1]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_S1]++;
}
break;
case CORE_INT:
if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
transition_count[CORE_INT]++;
}
else if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_INT]++;
}
break;
case CORE_FLOAT:
if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) {
state = CORE_S2;
transition_count[CORE_FLOAT]++;
}
else if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_FLOAT]++;
}
break;
case CORE_S2:
if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
state = CORE_EXPONENT;
transition_count[CORE_S2]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_S2]++;
}
break;
case CORE_EXPONENT:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_SCIENTIFIC;
transition_count[CORE_EXPONENT]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_EXPONENT]++;
}
break;
case CORE_SCIENTIFIC:
if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
break;
default:
break;
}
}
*instr=str;
return state;
enum CORE_STATE
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
{
ee_u8 * str = *instr;
ee_u8 NEXT_SYMBOL;
enum CORE_STATE state = CORE_START;
for (; *str && state != CORE_INVALID; str++)
{
NEXT_SYMBOL = *str;
if (NEXT_SYMBOL == ',') /* end of this input */
{
str++;
break;
}
switch (state)
{
case CORE_START:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
}
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_S1;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
}
else
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
transition_count[CORE_START]++;
break;
case CORE_S1:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INT;
transition_count[CORE_S1]++;
}
else if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_S1]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S1]++;
}
break;
case CORE_INT:
if (NEXT_SYMBOL == '.')
{
state = CORE_FLOAT;
transition_count[CORE_INT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INT]++;
}
break;
case CORE_FLOAT:
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
{
state = CORE_S2;
transition_count[CORE_FLOAT]++;
}
else if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_FLOAT]++;
}
break;
case CORE_S2:
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
{
state = CORE_EXPONENT;
transition_count[CORE_S2]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_S2]++;
}
break;
case CORE_EXPONENT:
if (ee_isdigit(NEXT_SYMBOL))
{
state = CORE_SCIENTIFIC;
transition_count[CORE_EXPONENT]++;
}
else
{
state = CORE_INVALID;
transition_count[CORE_EXPONENT]++;
}
break;
case CORE_SCIENTIFIC:
if (!ee_isdigit(NEXT_SYMBOL))
{
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
break;
default:
break;
}
}
*instr = str;
return state;
}

View File

@ -18,193 +18,232 @@ Original Author: Shay Gal-on
#include "coremark.h"
/* Function: get_seed
Get a values that cannot be determined at compile time.
Get a values that cannot be determined at compile time.
Since different embedded systems and compilers are used, 3 different methods are provided:
1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that
reads the value of a volatile variable from memory at run time.
Please note, if using this method, you would need to modify core_portme.c to generate training profile.
2 - Command line arguments. This is the preferred method if command line arguments are supported.
3 - System function. If none of the first 2 methods is available on the platform,
a system function which is not a stub can be used.
e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions.
Since different embedded systems and compilers are used, 3 different
methods are provided: 1 - Using a volatile variable. This method is only
valid if the compiler is forced to generate code that reads the value of a
volatile variable from memory at run time. Please note, if using this method,
you would need to modify core_portme.c to generate training profile. 2 -
Command line arguments. This is the preferred method if command line
arguments are supported. 3 - System function. If none of the first 2 methods
is available on the platform, a system function which is not a stub can be
used.
e.g. read the value on GPIO pins connected to switches, or invoke
special simulator functions.
*/
#if (SEED_METHOD==SEED_VOLATILE)
extern volatile ee_s32 seed1_volatile;
extern volatile ee_s32 seed2_volatile;
extern volatile ee_s32 seed3_volatile;
extern volatile ee_s32 seed4_volatile;
extern volatile ee_s32 seed5_volatile;
ee_s32 get_seed_32(int i) {
ee_s32 retval;
switch (i) {
case 1:
retval=seed1_volatile;
break;
case 2:
retval=seed2_volatile;
break;
case 3:
retval=seed3_volatile;
break;
case 4:
retval=seed4_volatile;
break;
case 5:
retval=seed5_volatile;
break;
default:
retval=0;
break;
}
return retval;
}
#elif (SEED_METHOD==SEED_ARG)
ee_s32 parseval(char *valstring) {
ee_s32 retval=0;
ee_s32 neg=1;
int hexmode=0;
if (*valstring == '-') {
neg=-1;
valstring++;
}
if ((valstring[0] == '0') && (valstring[1] == 'x')) {
hexmode=1;
valstring+=2;
}
/* first look for digits */
if (hexmode) {
while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) {
ee_s32 digit=*valstring-'0';
if (digit>9)
digit=10+*valstring-'a';
retval*=16;
retval+=digit;
valstring++;
}
} else {
while ((*valstring >= '0') && (*valstring <= '9')) {
ee_s32 digit=*valstring-'0';
retval*=10;
retval+=digit;
valstring++;
}
}
/* now add qualifiers */
if (*valstring=='K')
retval*=1024;
if (*valstring=='M')
retval*=1024*1024;
#if (SEED_METHOD == SEED_VOLATILE)
extern volatile ee_s32 seed1_volatile;
extern volatile ee_s32 seed2_volatile;
extern volatile ee_s32 seed3_volatile;
extern volatile ee_s32 seed4_volatile;
extern volatile ee_s32 seed5_volatile;
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = seed1_volatile;
break;
case 2:
retval = seed2_volatile;
break;
case 3:
retval = seed3_volatile;
break;
case 4:
retval = seed4_volatile;
break;
case 5:
retval = seed5_volatile;
break;
default:
retval = 0;
break;
}
return retval;
}
#elif (SEED_METHOD == SEED_ARG)
ee_s32
parseval(char *valstring)
{
ee_s32 retval = 0;
ee_s32 neg = 1;
int hexmode = 0;
if (*valstring == '-')
{
neg = -1;
valstring++;
}
if ((valstring[0] == '0') && (valstring[1] == 'x'))
{
hexmode = 1;
valstring += 2;
}
/* first look for digits */
if (hexmode)
{
while (((*valstring >= '0') && (*valstring <= '9'))
|| ((*valstring >= 'a') && (*valstring <= 'f')))
{
ee_s32 digit = *valstring - '0';
if (digit > 9)
digit = 10 + *valstring - 'a';
retval *= 16;
retval += digit;
valstring++;
}
}
else
{
while ((*valstring >= '0') && (*valstring <= '9'))
{
ee_s32 digit = *valstring - '0';
retval *= 10;
retval += digit;
valstring++;
}
}
/* now add qualifiers */
if (*valstring == 'K')
retval *= 1024;
if (*valstring == 'M')
retval *= 1024 * 1024;
retval*=neg;
return retval;
retval *= neg;
return retval;
}
ee_s32 get_seed_args(int i, int argc, char *argv[]) {
if (argc>i)
return parseval(argv[i]);
return 0;
ee_s32
get_seed_args(int i, int argc, char *argv[])
{
if (argc > i)
return parseval(argv[i]);
return 0;
}
#elif (SEED_METHOD==SEED_FUNC)
/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */
ee_s32 get_seed_32(int i) {
ee_s32 retval;
switch (i) {
case 1:
retval=portme_sys1();
break;
case 2:
retval=portme_sys2();
break;
case 3:
retval=portme_sys3();
break;
case 4:
retval=portme_sys4();
break;
case 5:
retval=portme_sys5();
break;
default:
retval=0;
break;
}
return retval;
#elif (SEED_METHOD == SEED_FUNC)
/* If using OS based function, you must define and implement the functions below
* in core_portme.h and core_portme.c ! */
ee_s32
get_seed_32(int i)
{
ee_s32 retval;
switch (i)
{
case 1:
retval = portme_sys1();
break;
case 2:
retval = portme_sys2();
break;
case 3:
retval = portme_sys3();
break;
case 4:
retval = portme_sys4();
break;
case 5:
retval = portme_sys5();
break;
default:
retval = 0;
break;
}
return retval;
}
#endif
/* Function: crc*
Service functions to calculate 16b CRC code.
Service functions to calculate 16b CRC code.
*/
ee_u16 crcu8(ee_u8 data, ee_u16 crc )
ee_u16
crcu8(ee_u8 data, ee_u16 crc)
{
ee_u8 i=0,x16=0,carry=0;
ee_u8 i = 0, x16 = 0, carry = 0;
for (i = 0; i < 8; i++)
for (i = 0; i < 8; i++)
{
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
data >>= 1;
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
data >>= 1;
if (x16 == 1)
{
crc ^= 0x4002;
carry = 1;
}
else
carry = 0;
crc >>= 1;
if (carry)
crc |= 0x8000;
else
crc &= 0x7fff;
if (x16 == 1)
{
crc ^= 0x4002;
carry = 1;
}
else
carry = 0;
crc >>= 1;
if (carry)
crc |= 0x8000;
else
crc &= 0x7fff;
}
return crc;
}
ee_u16 crcu16(ee_u16 newval, ee_u16 crc) {
crc=crcu8( (ee_u8) (newval) ,crc);
crc=crcu8( (ee_u8) ((newval)>>8) ,crc);
return crc;
return crc;
}
ee_u16 crcu32(ee_u32 newval, ee_u16 crc) {
crc=crc16((ee_s16) newval ,crc);
crc=crc16((ee_s16) (newval>>16) ,crc);
return crc;
ee_u16
crcu16(ee_u16 newval, ee_u16 crc)
{
crc = crcu8((ee_u8)(newval), crc);
crc = crcu8((ee_u8)((newval) >> 8), crc);
return crc;
}
ee_u16 crc16(ee_s16 newval, ee_u16 crc) {
return crcu16((ee_u16)newval, crc);
ee_u16
crcu32(ee_u32 newval, ee_u16 crc)
{
crc = crc16((ee_s16)newval, crc);
crc = crc16((ee_s16)(newval >> 16), crc);
return crc;
}
ee_u16
crc16(ee_s16 newval, ee_u16 crc)
{
return crcu16((ee_u16)newval, crc);
}
ee_u8 check_data_types() {
ee_u8 retval=0;
if (sizeof(ee_u8) != 1) {
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
retval++;
}
if (sizeof(ee_u16) != 2) {
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s16) != 2) {
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s32) != 4) {
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_ptr_int) != sizeof(int *)) {
ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
retval++;
}
if (retval>0) {
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
}
return retval;
ee_u8
check_data_types()
{
ee_u8 retval = 0;
if (sizeof(ee_u8) != 1)
{
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
retval++;
}
if (sizeof(ee_u16) != 2)
{
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s16) != 2)
{
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s32) != 4)
{
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_ptr_int) != sizeof(int *))
{
ee_printf(
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
retval++;
}
if (retval > 0)
{
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
}
return retval;
}

View File

@ -17,23 +17,23 @@ Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains declarations of the various benchmark functions.
This file contains declarations of the various benchmark functions.
*/
/* Configuration: TOTAL_DATA_SIZE
Define total size for data algorithms will operate on
Define total size for data algorithms will operate on
*/
#ifndef TOTAL_DATA_SIZE
#define TOTAL_DATA_SIZE 2*1000
#ifndef TOTAL_DATA_SIZE
#define TOTAL_DATA_SIZE 2 * 1000
#endif
#define SEED_ARG 0
#define SEED_FUNC 1
#define SEED_ARG 0
#define SEED_FUNC 1
#define SEED_VOLATILE 2
#define MEM_STATIC 0
#define MEM_MALLOC 1
#define MEM_STACK 2
#define MEM_STACK 2
#include "core_portme.h"
@ -48,8 +48,8 @@ Original Author: Shay Gal-on
void *iterate(void *pres);
/* Typedef: secs_ret
For machines that have floating point support, get number of seconds as a double.
Otherwise an unsigned int.
For machines that have floating point support, get number of seconds as
a double. Otherwise an unsigned int.
*/
#if HAS_FLOAT
typedef double secs_ret;
@ -58,47 +58,48 @@ typedef ee_u32 secs_ret;
#endif
#if MAIN_HAS_NORETURN
#define MAIN_RETURN_VAL
#define MAIN_RETURN_VAL
#define MAIN_RETURN_TYPE void
#else
#define MAIN_RETURN_VAL 0
#define MAIN_RETURN_VAL 0
#define MAIN_RETURN_TYPE int
#endif
#endif
void start_time(void);
void stop_time(void);
void start_time(void);
void stop_time(void);
CORE_TICKS get_time(void);
secs_ret time_in_secs(CORE_TICKS ticks);
secs_ret time_in_secs(CORE_TICKS ticks);
/* Misc useful functions */
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
ee_u8 check_data_types();
void *portable_malloc(ee_size_t size);
void portable_free(void *p);
ee_u8 check_data_types(void);
void * portable_malloc(ee_size_t size);
void portable_free(void *p);
ee_s32 parseval(char *valstring);
/* Algorithm IDS */
#define ID_LIST (1<<0)
#define ID_MATRIX (1<<1)
#define ID_STATE (1<<2)
#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE)
#define NUM_ALGORITHMS 3
#define ID_LIST (1 << 0)
#define ID_MATRIX (1 << 1)
#define ID_STATE (1 << 2)
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
#define NUM_ALGORITHMS 3
/* list data structures */
typedef struct list_data_s {
ee_s16 data16;
ee_s16 idx;
typedef struct list_data_s
{
ee_s16 data16;
ee_s16 idx;
} list_data;
typedef struct list_head_s {
struct list_head_s *next;
struct list_data_s *info;
typedef struct list_head_s
{
struct list_head_s *next;
struct list_data_s *info;
} list_head;
/*matrix benchmark related stuff */
#define MATDAT_INT 1
#if MATDAT_INT
@ -109,66 +110,74 @@ typedef ee_f16 MATDAT;
typedef ee_f32 MATRES;
#endif
typedef struct MAT_PARAMS_S {
int N;
MATDAT *A;
MATDAT *B;
MATRES *C;
typedef struct MAT_PARAMS_S
{
int N;
MATDAT *A;
MATDAT *B;
MATRES *C;
} mat_params;
/* state machine related stuff */
/* List of all the possible states for the FSM */
typedef enum CORE_STATE {
CORE_START=0,
CORE_INVALID,
CORE_S1,
CORE_S2,
CORE_INT,
CORE_FLOAT,
CORE_EXPONENT,
CORE_SCIENTIFIC,
NUM_CORE_STATES
} core_state_e ;
typedef enum CORE_STATE
{
CORE_START = 0,
CORE_INVALID,
CORE_S1,
CORE_S2,
CORE_INT,
CORE_FLOAT,
CORE_EXPONENT,
CORE_SCIENTIFIC,
NUM_CORE_STATES
} core_state_e;
/* Helper structure to hold results */
typedef struct RESULTS_S {
/* inputs */
ee_s16 seed1; /* Initializing seed */
ee_s16 seed2; /* Initializing seed */
ee_s16 seed3; /* Initializing seed */
void *memblock[4]; /* Pointer to safe memory location */
ee_u32 size; /* Size of the data */
ee_u32 iterations; /* Number of iterations to execute */
ee_u32 execs; /* Bitmask of operations to execute */
struct list_head_s *list;
mat_params mat;
/* outputs */
ee_u16 crc;
ee_u16 crclist;
ee_u16 crcmatrix;
ee_u16 crcstate;
ee_s16 err;
/* ultithread specific */
core_portable port;
typedef struct RESULTS_S
{
/* inputs */
ee_s16 seed1; /* Initializing seed */
ee_s16 seed2; /* Initializing seed */
ee_s16 seed3; /* Initializing seed */
void * memblock[4]; /* Pointer to safe memory location */
ee_u32 size; /* Size of the data */
ee_u32 iterations; /* Number of iterations to execute */
ee_u32 execs; /* Bitmask of operations to execute */
struct list_head_s *list;
mat_params mat;
/* outputs */
ee_u16 crc;
ee_u16 crclist;
ee_u16 crcmatrix;
ee_u16 crcstate;
ee_s16 err;
/* ultithread specific */
core_portable port;
} core_results;
/* Multicore execution handling */
#if (MULTITHREAD>1)
#if (MULTITHREAD > 1)
ee_u8 core_start_parallel(core_results *res);
ee_u8 core_stop_parallel(core_results *res);
#endif
/* list benchmark functions */
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
/* state benchmark functions */
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc);
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc);
/* matrix benchmark functions */
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p);
ee_u32 core_init_matrix(ee_u32 blksize,
void * memblk,
ee_s32 seed,
mat_params *p);
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);

View File

@ -0,0 +1,6 @@
8d082dc4a9676c02731a8cf209339072 core_list_join.c
c984863b84b59185d8b5fb81c1ca7535 core_main.c
5fa21a0f7c3964167c9691db531ca652 core_matrix.c
edcfc7a0b146a50028014f06e6826aa3 core_state.c
45540ba2145adea1ec7ea2c72a1fbbcb core_util.c
8ca974c013b380dc7f0d6d1afb76eb2d coremark.h

126
riscv-coremark/coremark/cygwin/core_portme.mak Executable file → Normal file
View File

@ -14,128 +14,4 @@
#
# Original Author: Shay Gal-on
#File: core_portme.mak
# Flag: OUTFLAG
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
# Flag: CC
# Use this flag to define compiler to use
CC = gcc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O2
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END =
# Flag: PORT_SRCS
# Port specific source files can be added here
PORT_SRCS = $(PORT_DIR)/core_portme.c
# Flag: LOAD
# Define this flag if you need to load to a target, as in a cross compile environment.
# Flag: RUN
# Define this flag if running does not consist of simple invocation of the binary.
# In a cross compile environment, you need to define this.
#For flashing and using a tera term macro, you could use
#LOAD = flash ADDR
#RUN = ttpmacro coremark.ttl
#For copying to target and executing via SSH connection, you could use
#LOAD = scp $(OUTFILE) user@target:~
#RUN = ssh user@target -c
#For native compilation and execution
LOAD = echo Loading done
RUN =
OEXT = .o
EXE = .exe
# Flag: SEPARATE_COMPILE
# Define if you need to separate compilation from link stage.
# In this case, you also need to define below how to create an object file, and how to link.
ifdef SEPARATE_COMPILE
LD = gcc
OBJOUT = -o
LFLAGS =
OFLAG = -o
COUT = -c
# Flag: PORT_OBJS
# Port specific object files can be added here
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
PORT_CLEAN = *$(OEXT)
$(OPATH)%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
endif
# Target: port_prebuild
# Generate any files that are needed before actual build starts.
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
# Note - Using REBUILD=1
#
# Use make PGO=1 to invoke this sample processing.
ifdef PGO
ifeq (,$(findstring $(PGO),gen))
PGO_STAGE=build_pgo_gcc
CFLAGS+=-fprofile-use
endif
PORT_CLEAN+=*.gcda *.gcno gmon.out
endif
.PHONY: port_prebuild
port_prebuild: $(PGO_STAGE)
.PHONY: build_pgo_gcc
build_pgo_gcc:
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
# Target: port_postbuild
# Generate any files that are needed after actual build end.
# E.g. change format to srec, bin, zip in order to be able to load into flash
.PHONY: port_postbuild
port_postbuild:
# Target: port_postrun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_postrun
port_postrun:
# Target: port_prerun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_prerun
port_prerun:
# Target: port_postload
# Do platform specific after load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_postload
port_postload:
# Target: port_preload
# Do platform specific before load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_preload
port_preload:
# FLAG: OPATH
# Path to the output folder. Default - current folder.
OPATH = ./
MKDIR = mkdir -p
# FLAG: PERL
# Define perl executable to calculate the geomean if running separate.
PERL=perl
include posix/core_portme.mak

View File

@ -0,0 +1,17 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
include posix/core_portme.mak

125
riscv-coremark/coremark/linux/core_portme.mak Executable file → Normal file
View File

@ -14,127 +14,4 @@
#
# Original Author: Shay Gal-on
#File: core_portme.mak
# Flag: OUTFLAG
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
# Flag: CC
# Use this flag to define compiler to use
CC = gcc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O2
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END += -lrt
# Flag: PORT_SRCS
# Port specific source files can be added here
PORT_SRCS = $(PORT_DIR)/core_portme.c
# Flag: LOAD
# Define this flag if you need to load to a target, as in a cross compile environment.
# Flag: RUN
# Define this flag if running does not consist of simple invocation of the binary.
# In a cross compile environment, you need to define this.
#For flashing and using a tera term macro, you could use
#LOAD = flash ADDR
#RUN = ttpmacro coremark.ttl
#For copying to target and executing via SSH connection, you could use
#LOAD = scp $(OUTFILE) user@target:~
#RUN = ssh user@target -c
#For native compilation and execution
LOAD = echo Loading done
RUN =
OEXT = .o
EXE = .exe
# Flag: SEPARATE_COMPILE
# Define if you need to separate compilation from link stage.
# In this case, you also need to define below how to create an object file, and how to link.
ifdef SEPARATE_COMPILE
LD = gcc
OBJOUT = -o
LFLAGS =
OFLAG = -o
COUT = -c
# Flag: PORT_OBJS
# Port specific object files can be added here
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
PORT_CLEAN = *$(OEXT)
$(OPATH)%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
endif
# Target: port_prebuild
# Generate any files that are needed before actual build starts.
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
# Note - Using REBUILD=1
#
# Use make PGO=1 to invoke this sample processing.
ifdef PGO
ifeq (,$(findstring $(PGO),gen))
PGO_STAGE=build_pgo_gcc
CFLAGS+=-fprofile-use
endif
PORT_CLEAN+=*.gcda *.gcno gmon.out
endif
.PHONY: port_prebuild
port_prebuild: $(PGO_STAGE)
.PHONY: build_pgo_gcc
build_pgo_gcc:
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
# Target: port_postbuild
# Generate any files that are needed after actual build end.
# E.g. change format to srec, bin, zip in order to be able to load into flash
.PHONY: port_postbuild
port_postbuild:
# Target: port_postrun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_postrun
port_postrun:
# Target: port_prerun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_prerun
port_prerun:
# Target: port_postload
# Do platform specific after load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_postload
port_postload:
# Target: port_preload
# Do platform specific before load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_preload
port_preload:
# FLAG: OPATH
# Path to the output folder. Default - current folder.
OPATH = ./
MKDIR = mkdir -p
# FLAG: PERL
# Define perl executable to calculate the geomean if running separate.
PERL=/usr/bin/perl
include posix/core_portme.mak

View File

@ -0,0 +1,18 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
NO_LIBRT = 1
include posix/core_portme.mak

View File

@ -0,0 +1,419 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include <stdio.h>
#include <stdlib.h>
#include "coremark.h"
#if CALLGRIND_RUN
#include <valgrind/callgrind.h>
#endif
#if (MEM_METHOD == MEM_MALLOC)
/* Function: portable_malloc
Provide malloc() functionality in a platform specific way.
*/
void *
portable_malloc(size_t size)
{
return malloc(size);
}
/* Function: portable_free
Provide free() functionality in a platform specific way.
*/
void
portable_free(void *p)
{
free(p);
}
#else
void *
portable_malloc(size_t size)
{
return NULL;
}
void
portable_free(void *p)
{
p = NULL;
}
#endif
#if (SEED_METHOD == SEED_VOLATILE)
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile = 0x3415;
volatile ee_s32 seed2_volatile = 0x3415;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile = 0x0;
volatile ee_s32 seed2_volatile = 0x0;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile = 0x8;
volatile ee_s32 seed2_volatile = 0x8;
volatile ee_s32 seed3_volatile = 0x8;
#endif
volatile ee_s32 seed4_volatile = ITERATIONS;
volatile ee_s32 seed5_volatile = 0;
#endif
/* Porting: Timing functions
How to capture time and convert to seconds must be ported to whatever is
supported by the platform. e.g. Read value from on board RTC, read value from
cpu clock cycles performance counter etc. Sample implementation for standard
time.h and windows.h definitions included.
*/
/* Define: TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be
measured.
Use lower values to increase resolution, but make sure that overflow
does not occur. If there are issues with the return value overflowing,
increase this value.
*/
#if USE_CLOCK
#define NSECS_PER_SEC CLOCKS_PER_SEC
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE clock_t
#define GETMYTIME(_t) (*_t = clock())
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#elif defined(_MSC_VER)
#define NSECS_PER_SEC 10000000
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE FILETIME
#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t)
#define MYTIMEDIFF(fin, ini) \
(((*(__int64 *)&fin) - (*(__int64 *)&ini)) / TIMER_RES_DIVIDER)
/* setting to millisces resolution by default with MSDEV */
#ifndef TIMER_RES_DIVIDER
#define TIMER_RES_DIVIDER 1000
#endif
#define SAMPLE_TIME_IMPLEMENTATION 1
#elif HAS_TIME_H
#define NSECS_PER_SEC 1000000000
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE struct timespec
#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME, _t)
#define MYTIMEDIFF(fin, ini) \
((fin.tv_sec - ini.tv_sec) * (NSECS_PER_SEC / TIMER_RES_DIVIDER) \
+ (fin.tv_nsec - ini.tv_nsec) / TIMER_RES_DIVIDER)
/* setting to 1/1000 of a second resolution by default with linux */
#ifndef TIMER_RES_DIVIDER
#define TIMER_RES_DIVIDER 1000000
#endif
#define SAMPLE_TIME_IMPLEMENTATION 1
#else
#define SAMPLE_TIME_IMPLEMENTATION 0
#endif
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
#if SAMPLE_TIME_IMPLEMENTATION
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function: start_time
This function will be called right before starting the timed portion of
the benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or zeroing some system parameters - e.g. setting the cpu clocks
cycles to 0.
*/
void
start_time(void)
{
GETMYTIME(&start_time_val);
#if CALLGRIND_RUN
CALLGRIND_START_INSTRUMENTATION
#endif
#if MICA
asm volatile("int3"); /*1 */
#endif
}
/* Function: stop_time
This function will be called right after ending the timed portion of the
benchmark.
Implementation may be capturing a system timer (as implemented in the
example code) or other system parameters - e.g. reading the current value of
cpu cycles counter.
*/
void
stop_time(void)
{
#if CALLGRIND_RUN
CALLGRIND_STOP_INSTRUMENTATION
#endif
#if MICA
asm volatile("int3"); /*1 */
#endif
GETMYTIME(&stop_time_val);
}
/* Function: get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other
value, as long as it can be converted to seconds by <time_in_secs>. This
methodology is taken to accomodate any hardware or simulated platform. The
sample implementation returns millisecs by default, and the resolution is
controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS
get_time(void)
{
CORE_TICKS elapsed
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function: time_in_secs
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
macro above.
*/
secs_ret
time_in_secs(CORE_TICKS ticks)
{
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
#else
#error "Please implement timing functionality in core_portme.c"
#endif /* SAMPLE_TIME_IMPLEMENTATION */
ee_u32 default_num_contexts = MULTITHREAD;
/* Function: portable_init
Target specific initialization code
Test for some common mistakes.
*/
void
portable_init(core_portable *p, int *argc, char *argv[])
{
#if PRINT_ARGS
int i;
for (i = 0; i < *argc; i++)
{
ee_printf("Arg[%d]=%s\n", i, argv[i]);
}
#endif
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
{
ee_printf(
"ERROR! Please define ee_ptr_int to a type that holds a "
"pointer!\n");
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
#if (MAIN_HAS_NOARGC && (SEED_METHOD == SEED_ARG))
ee_printf(
"ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n");
#endif
#if (MULTITHREAD > 1) && (SEED_METHOD == SEED_ARG)
int nargs = *argc, i;
if ((nargs > 1) && (*argv[1] == 'M'))
{
default_num_contexts = parseval(argv[1] + 1);
if (default_num_contexts > MULTITHREAD)
default_num_contexts = MULTITHREAD;
/* Shift args since first arg is directed to the portable part and not
* to coremark main */
--nargs;
for (i = 1; i < nargs; i++)
argv[i] = argv[i + 1];
*argc = nargs;
}
#endif /* sample of potential platform specific init via command line, reset \
the number of contexts being used if first argument is M<n>*/
p->portable_id = 1;
}
/* Function: portable_fini
Target specific final code
*/
void
portable_fini(core_portable *p)
{
p->portable_id = 0;
}
#if (MULTITHREAD > 1)
/* Function: core_start_parallel
Start benchmarking in a parallel context.
Three implementations are provided, one using pthreads, one using fork
and shared mem, and one using fork and sockets. Other implementations using
MCAPI or other standards can easily be devised.
*/
/* Function: core_stop_parallel
Stop a parallel context execution of coremark, and gather the results.
Three implementations are provided, one using pthreads, one using fork
and shared mem, and one using fork and sockets. Other implementations using
MCAPI or other standards can easily be devised.
*/
#if USE_PTHREAD
ee_u8
core_start_parallel(core_results *res)
{
return (ee_u8)pthread_create(
&(res->port.thread), NULL, iterate, (void *)res);
}
ee_u8
core_stop_parallel(core_results *res)
{
void *retval;
return (ee_u8)pthread_join(res->port.thread, &retval);
}
#elif USE_FORK
static int key_id = 0;
ee_u8
core_start_parallel(core_results *res)
{
key_t key = 4321 + key_id;
key_id++;
res->port.pid = fork();
res->port.shmid = shmget(key, 8, IPC_CREAT | 0666);
if (res->port.shmid < 0)
{
ee_printf("ERROR in shmget!\n");
}
if (res->port.pid == 0)
{
iterate(res);
res->port.shm = shmat(res->port.shmid, NULL, 0);
/* copy the validation values to the shared memory area and quit*/
if (res->port.shm == (char *)-1)
{
ee_printf("ERROR in child shmat!\n");
}
else
{
memcpy(res->port.shm, &(res->crc), 8);
shmdt(res->port.shm);
}
exit(0);
}
return 1;
}
ee_u8
core_stop_parallel(core_results *res)
{
int status;
pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED);
if (wpid != res->port.pid)
{
ee_printf("ERROR waiting for child.\n");
if (errno == ECHILD)
ee_printf("errno=No such child %d\n", res->port.pid);
if (errno == EINTR)
ee_printf("errno=Interrupted\n");
return 0;
}
/* after process is done, get the values from the shared memory area */
res->port.shm = shmat(res->port.shmid, NULL, 0);
if (res->port.shm == (char *)-1)
{
ee_printf("ERROR in parent shmat!\n");
return 0;
}
memcpy(&(res->crc), res->port.shm, 8);
shmdt(res->port.shm);
return 1;
}
#elif USE_SOCKET
static int key_id = 0;
ee_u8
core_start_parallel(core_results *res)
{
int bound, buffer_length = 8;
res->port.sa.sin_family = AF_INET;
res->port.sa.sin_addr.s_addr = htonl(0x7F000001);
res->port.sa.sin_port = htons(7654 + key_id);
key_id++;
res->port.pid = fork();
if (res->port.pid == 0)
{ /* benchmark child */
iterate(res);
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (-1 == res->port.sock) /* if socket failed to initialize, exit */
{
ee_printf("Error Creating Socket");
}
else
{
int bytes_sent = sendto(res->port.sock,
&(res->crc),
buffer_length,
0,
(struct sockaddr *)&(res->port.sa),
sizeof(struct sockaddr_in));
if (bytes_sent < 0)
ee_printf("Error sending packet: %s\n", strerror(errno));
close(res->port.sock); /* close the socket */
}
exit(0);
}
/* parent process, open the socket */
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
bound = bind(res->port.sock,
(struct sockaddr *)&(res->port.sa),
sizeof(struct sockaddr));
if (bound < 0)
ee_printf("bind(): %s\n", strerror(errno));
return 1;
}
ee_u8
core_stop_parallel(core_results *res)
{
int status;
int fromlen = sizeof(struct sockaddr);
int recsize = recvfrom(res->port.sock,
&(res->crc),
8,
0,
(struct sockaddr *)&(res->port.sa),
&fromlen);
if (recsize < 0)
{
ee_printf("Error in receive: %s\n", strerror(errno));
return 0;
}
pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED);
if (wpid != res->port.pid)
{
ee_printf("ERROR waiting for child.\n");
if (errno == ECHILD)
ee_printf("errno=No such child %d\n", res->port.pid);
if (errno == EINTR)
ee_printf("errno=Interrupted\n");
return 0;
}
return 1;
}
#else /* no standard multicore implementation */
#error \
"Please implement multicore functionality in core_portme.c to use multiple contexts."
#endif /* multithread implementations */
#endif

View File

@ -0,0 +1,314 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains configuration constants required to execute on
different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
#include "core_portme_posix_overrides.h"
/************************/
/* Data types and settings */
/************************/
/* Configuration: HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration: HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 1
#endif
/* Configuration: USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 0
#endif
/* Configuration: HAS_STDIO
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 1
#endif
/* Configuration: HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf
function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 1
#endif
/* Configuration: CORE_TICKS
Define type of return from the timing functions.
*/
#if defined(_MSC_VER)
#include <windows.h>
typedef size_t CORE_TICKS;
#elif HAS_TIME_H
#include <time.h>
typedef clock_t CORE_TICKS;
#else
#error \
"Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!"
#endif
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS \
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION \
"Please put data memory location here\n\t\t\t(e.g. code in flash, data " \
"on heap etc)"
#define MEM_LOCATION_UNSPEC 1
#endif
#include <stdint.h>
/* Data Types:
To avoid compiler issues, define the data types that need ot be used for
8b, 16b and 32b in <core_portme.h>.
*Imprtant*:
ee_ptr_int needs to be the data type used to hold pointers, otherwise
coremark may fail!!!
*/
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef uintptr_t ee_ptr_int;
typedef size_t ee_size_t;
/* align an offset to point to a 32b value */
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
/* Configuration: SEED_METHOD
Defines method to get seed values that cannot be computed at compile
time.
Valid values:
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_ARG
#endif
/* Configuration: MEM_METHOD
Defines method to get a block of memry.
Valid values:
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_MALLOC
#endif
/* Configuration: MULTITHREAD
Define for parallel execution
Valid values:
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note:
If this flag is defined to more then 1, an implementation for launching
parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
to enable them.
It is valid to have a different implementation of <core_start_parallel>
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#endif
/* Configuration: USE_PTHREAD
Sample implementation for launching parallel contexts
This implementation uses pthread_thread_create and pthread_join.
Valid values:
0 - Do not use pthreads API.
1 - Use pthreads API
Note:
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef USE_PTHREAD
#define USE_PTHREAD 0
#endif
/* Configuration: USE_FORK
Sample implementation for launching parallel contexts
This implementation uses fork, waitpid, shmget,shmat and shmdt.
Valid values:
0 - Do not use fork API.
1 - Use fork API
Note:
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef USE_FORK
#define USE_FORK 0
#endif
/* Configuration: USE_SOCKET
Sample implementation for launching parallel contexts
This implementation uses fork, socket, sendto and recvfrom
Valid values:
0 - Do not use fork and sockets API.
1 - Use fork and sockets API
Note:
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef USE_SOCKET
#define USE_SOCKET 0
#endif
/* Configuration: MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values:
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
*/
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 0
#endif
/* Configuration: MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values:
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable: default_num_contexts
Number of contexts to spawn in multicore context.
Override this global value to change number of contexts used.
Note:
This value may not be set higher then the <MULTITHREAD> define.
To experiment, you can set the <MULTITHREAD> define to the highest value
expected, and use argc/argv in the <portable_init> to set this value from the
command line.
*/
extern ee_u32 default_num_contexts;
#if (MULTITHREAD > 1)
#if USE_PTHREAD
#include <pthread.h>
#define PARALLEL_METHOD "PThreads"
#elif USE_FORK
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/shm.h>
#include <string.h> /* for memcpy */
#define PARALLEL_METHOD "Fork"
#elif USE_SOCKET
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/wait.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#define PARALLEL_METHOD "Sockets"
#else
#define PARALLEL_METHOD "Proprietary"
#error \
"Please implement multicore functionality in core_portme.c to use multiple contexts."
#endif /* Method for multithreading */
#endif /* MULTITHREAD > 1 */
typedef struct CORE_PORTABLE_S
{
#if (MULTITHREAD > 1)
#if USE_PTHREAD
pthread_t thread;
#elif USE_FORK
pid_t pid;
int shmid;
void *shm;
#elif USE_SOCKET
pid_t pid;
int sock;
struct sockaddr_in sa;
#endif /* Method for multithreading */
#endif /* MULTITHREAD>1 */
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if (SEED_METHOD == SEED_VOLATILE)
#if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN)
#define RUN_TYPE_FLAG 1
#else
#if (TOTAL_DATA_SIZE == 1200)
#define PROFILE_RUN 1
#else
#define PERFORMANCE_RUN 1
#endif
#endif
#endif /* SEED_METHOD==SEED_VOLATILE */
#endif /* CORE_PORTME_H */

View File

@ -0,0 +1,151 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
#File: core_portme.mak
# Flag: OUTFLAG
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
# Flag: CC
# Use this flag to define compiler to use
CC?= cc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
PORT_CFLAGS = -O2
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -Iposix -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
# Flag: NO_LIBRT
# Define if the platform does not provide a librt
ifndef NO_LIBRT
#Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END += -lrt
endif
# Flag: PORT_SRCS
# Port specific source files can be added here
PORT_SRCS = posix/core_portme.c
vpath %.c posix
vpath %.h posix
vpath %.mak posix
# Flag: EXTRA_DEPENDS
# Port specific extra build dependencies.
# Some ports inherit from us, so ensure this Makefile is always a dependency.
EXTRA_DEPENDS += posix/core_portme.mak
# Flag: LOAD
# Define this flag if you need to load to a target, as in a cross compile environment.
# Flag: RUN
# Define this flag if running does not consist of simple invocation of the binary.
# In a cross compile environment, you need to define this.
#For flashing and using a tera term macro, you could use
#LOAD = flash ADDR
#RUN = ttpmacro coremark.ttl
#For copying to target and executing via SSH connection, you could use
#LOAD = scp $(OUTFILE) user@target:~
#RUN = ssh user@target -c
#For native compilation and execution
LOAD = echo Loading done
RUN =
OEXT = .o
EXE = .exe
# Flag: SEPARATE_COMPILE
# Define if you need to separate compilation from link stage.
# In this case, you also need to define below how to create an object file, and how to link.
ifdef SEPARATE_COMPILE
LD = gcc
OBJOUT = -o
LFLAGS =
OFLAG = -o
COUT = -c
# Flag: PORT_OBJS
# Port specific object files can be added here
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
PORT_CLEAN = *$(OEXT)
$(OPATH)%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
endif
# Target: port_prebuild
# Generate any files that are needed before actual build starts.
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
# Note - Using REBUILD=1
#
# Use make PGO=1 to invoke this sample processing.
ifdef PGO
ifeq (,$(findstring $(PGO),gen))
PGO_STAGE=build_pgo_gcc
CFLAGS+=-fprofile-use
endif
PORT_CLEAN+=*.gcda *.gcno gmon.out
endif
.PHONY: port_prebuild
port_prebuild: $(PGO_STAGE)
.PHONY: build_pgo_gcc
build_pgo_gcc:
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
# Target: port_postbuild
# Generate any files that are needed after actual build end.
# E.g. change format to srec, bin, zip in order to be able to load into flash
.PHONY: port_postbuild
port_postbuild:
# Target: port_postrun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_postrun
port_postrun:
# Target: port_prerun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_prerun
port_prerun:
# Target: port_postload
# Do platform specific after load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_postload
port_postload:
# Target: port_preload
# Do platform specific before load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_preload
port_preload:
# FLAG: OPATH
# Path to the output folder. Default - current folder.
OPATH = ./
MKDIR = mkdir -p
# FLAG: PERL
# Define perl executable to calculate the geomean if running separate.
PERL=/usr/bin/perl

View File

@ -0,0 +1,28 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains additional configuration constants required to execute on
different platforms over and above the POSIX defaults
*/
#ifndef CORE_PORTME_POSIX_OVERRIDES_H
#define CORE_PORTME_POSIX_OVERRIDES_H
/* None by default */
#endif

View File

@ -0,0 +1,18 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
NO_LIBRT = 1
include posix/core_portme.mak

View File

@ -0,0 +1,63 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 Hesham Almatary
*
* This software was developed by SRI International and the University of
* Cambridge Computer Laboratory (Department of Computer Science and
* Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
* DARPA SSITH research programme.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdlib.h>
#include <bsp.h>
int main(
int argc,
void **args
);
rtems_task Init(
rtems_task_argument ignored
);
rtems_task Init(
rtems_task_argument ignored
)
{
int ret = main(0, NULL);
exit(ret);
}
/* configuration information */
#define CONFIGURE_APPLICATION_NEEDS_SIMPLE_CONSOLE_DRIVER
#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
#define CONFIGURE_MAXIMUM_TASKS 20
#define CONFIGURE_RTEMS_INIT_TASKS_TABLE
#define CONFIGURE_INIT
#include <rtems/confdefs.h>

153
riscv-coremark/coremark/simple/core_portme.c Executable file → Normal file
View File

@ -21,108 +21,129 @@ Original Author: Shay Gal-on
#include "coremark.h"
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile=0x3415;
volatile ee_s32 seed2_volatile=0x3415;
volatile ee_s32 seed3_volatile=0x66;
volatile ee_s32 seed1_volatile = 0x3415;
volatile ee_s32 seed2_volatile = 0x3415;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile=0x0;
volatile ee_s32 seed2_volatile=0x0;
volatile ee_s32 seed3_volatile=0x66;
volatile ee_s32 seed1_volatile = 0x0;
volatile ee_s32 seed2_volatile = 0x0;
volatile ee_s32 seed3_volatile = 0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile=0x8;
volatile ee_s32 seed2_volatile=0x8;
volatile ee_s32 seed3_volatile=0x8;
volatile ee_s32 seed1_volatile = 0x8;
volatile ee_s32 seed2_volatile = 0x8;
volatile ee_s32 seed3_volatile = 0x8;
#endif
volatile ee_s32 seed4_volatile=ITERATIONS;
volatile ee_s32 seed5_volatile=0;
volatile ee_s32 seed4_volatile = ITERATIONS;
volatile ee_s32 seed5_volatile = 0;
/* Porting : Timing functions
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
Sample implementation for standard time.h and windows.h definitions included.
How to capture time and convert to seconds must be ported to whatever is
supported by the platform. e.g. Read value from on board RTC, read value from
cpu clock cycles performance counter etc. Sample implementation for standard
time.h and windows.h definitions included.
*/
/* Define : TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be measured.
Divider to trade off timer resolution and total time that can be
measured.
Use lower values to increase resolution, but make sure that overflow does not occur.
If there are issues with the return value overflowing, increase this value.
*/
#define NSECS_PER_SEC CLOCKS_PER_SEC
#define CORETIMETYPE clock_t
#define GETMYTIME(_t) (*_t=clock())
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1
Use lower values to increase resolution, but make sure that overflow
does not occur. If there are issues with the return value overflowing,
increase this value.
*/
#define NSECS_PER_SEC CLOCKS_PER_SEC
#define CORETIMETYPE clock_t
#define GETMYTIME(_t) (*_t = clock())
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function : start_time
This function will be called right before starting the timed portion of the benchmark.
This function will be called right before starting the timed portion of
the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
Implementation may be capturing a system timer (as implemented in the
example code) or zeroing some system parameters - e.g. setting the cpu clocks
cycles to 0.
*/
void start_time(void) {
GETMYTIME(&start_time_val );
void
start_time(void)
{
GETMYTIME(&start_time_val);
}
/* Function : stop_time
This function will be called right after ending the timed portion of the benchmark.
This function will be called right after ending the timed portion of the
benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or other system parameters - e.g. reading the current value of cpu cycles counter.
Implementation may be capturing a system timer (as implemented in the
example code) or other system parameters - e.g. reading the current value of
cpu cycles counter.
*/
void stop_time(void) {
GETMYTIME(&stop_time_val );
void
stop_time(void)
{
GETMYTIME(&stop_time_val);
}
/* Function : get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other value,
as long as it can be converted to seconds by <time_in_secs>.
This methodology is taken to accomodate any hardware or simulated platform.
The sample implementation returns millisecs by default,
and the resolution is controlled by <TIMER_RES_DIVIDER>
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other
value, as long as it can be converted to seconds by <time_in_secs>. This
methodology is taken to accomodate any hardware or simulated platform. The
sample implementation returns millisecs by default, and the resolution is
controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
CORE_TICKS
get_time(void)
{
CORE_TICKS elapsed
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function : time_in_secs
Convert the value returned by get_time to seconds.
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for floating point.
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
The <secs_ret> type is used to accomodate systems with no support for
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
macro above.
*/
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
secs_ret
time_in_secs(CORE_TICKS ticks)
{
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
ee_u32 default_num_contexts=1;
ee_u32 default_num_contexts = 1;
/* Function : portable_init
Target specific initialization code
Test for some common mistakes.
Target specific initialization code
Test for some common mistakes.
*/
void portable_init(core_portable *p, int *argc, char *argv[])
void
portable_init(core_portable *p, int *argc, char *argv[])
{
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id=1;
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
{
ee_printf(
"ERROR! Please define ee_ptr_int to a type that holds a "
"pointer!\n");
}
if (sizeof(ee_u32) != 4)
{
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id = 1;
}
/* Function : portable_fini
Target specific final code
Target specific final code
*/
void portable_fini(core_portable *p)
void
portable_fini(core_portable *p)
{
p->portable_id=0;
p->portable_id = 0;
}

176
riscv-coremark/coremark/simple/core_portme.h Executable file → Normal file
View File

@ -17,176 +17,188 @@ Original Author: Shay Gal-on
*/
/* Topic : Description
This file contains configuration constants required to execute on different platforms
This file contains configuration constants required to execute on
different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration : HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 1
#endif
/* Configuration : USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 1
#endif
/* Configuration : HAS_STDIO
Define to 1 if the platform has stdio.h.
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 1
#endif
/* Configuration : HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf function.
Define to 1 if the platform has stdio.h and implements the printf
function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 1
#endif
/* Configuration : CORE_TICKS
Define type of return from the timing functions.
Define type of return from the timing functions.
*/
#include <time.h>
typedef clock_t CORE_TICKS;
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS \
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#endif
/* Data Types :
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
To avoid compiler issues, define the data types that need ot be used for
8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise
coremark may fail!!!
*/
typedef signed short ee_s16;
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef ee_u32 ee_ptr_int;
typedef size_t ee_size_t;
/* align_mem :
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
This macro is used to align an offset to point to a 32b value. It is
used in the Matrix algorithm to initialize the input memory blocks.
*/
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
/* Configuration : SEED_METHOD
Defines method to get seed values that cannot be computed at compile time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
Defines method to get seed values that cannot be computed at compile
time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration : MEM_METHOD
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STACK
#endif
/* Configuration : MULTITHREAD
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
to fit a particular architecture.
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching
parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
to enable them.
It is valid to have a different implementation of <core_start_parallel>
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#define USE_PTHREAD 0
#define USE_FORK 0
#define USE_SOCKET 0
#define USE_FORK 0
#define USE_SOCKET 0
#endif
/* Configuration : MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value
greater then 1.
*/
#ifndef MAIN_HAS_NOARGC
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 0
#endif
/* Configuration : MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable : default_num_contexts
Not used for this simple port, must cintain the value 1.
Not used for this simple port, must cintain the value 1.
*/
extern ee_u32 default_num_contexts;
typedef struct CORE_PORTABLE_S {
ee_u8 portable_id;
typedef struct CORE_PORTABLE_S
{
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE==1200)
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
&& !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE == 1200)
#define PROFILE_RUN 1
#elif (TOTAL_DATA_SIZE==2000)
#elif (TOTAL_DATA_SIZE == 2000)
#define PERFORMANCE_RUN 1
#else
#define VALIDATION_RUN 1

View File

@ -125,6 +125,28 @@ void portable_free(void *p) {
#if SAMPLE_TIME_IMPLEMENTATION
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
static unsigned long start_instr_val, stop_instr_val;
/* Function: minstretFunc
This function will count the number of instructions.
*/
unsigned long minstretFunc(void)
{
unsigned long minstretRead = read_csr(minstret);
//ee_printf("Minstret is %lu\n", minstretRead);
return minstretRead;
}
/* Function: minstretDiff
This function will take the difference between the first and second reads from the
MINSTRET csr to determine the number of machine instructions retired between two points
of time
*/
unsigned long minstretDiff(void)
{
unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val);
return minstretDifference;
}
/* Function: start_time
This function will be called right before starting the timed portion of the benchmark.
@ -133,9 +155,10 @@ static CORETIMETYPE start_time_val, stop_time_val;
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
*/
void start_time(void) {
start_instr_val = minstretFunc();
GETMYTIME(start_time_val);
ee_printf("Timer started\n");
ee_printf(" MTIME: %u\n", start_time_val);
//ee_printf("Timer started\n");
//ee_printf(" MTIME: %u\n", start_time_val);
#if CALLGRIND_RUN
CALLGRIND_START_INSTRUMENTATION
#endif
@ -157,8 +180,9 @@ void stop_time(void) {
asm volatile("int3");/*1 */
#endif
GETMYTIME(stop_time_val);
ee_printf("Timer stopped\n");
ee_printf(" MTIME: %u\n", stop_time_val);
stop_instr_val = minstretFunc();
//ee_printf("Timer stopped\n");
//ee_printf(" MTIME: %u\n", stop_time_val);
}
/* Function: get_time
Return an abstract "ticks" number that signifies time on the system.
@ -171,7 +195,8 @@ void stop_time(void) {
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
ee_printf(" Elapsed MTIME: %u\n", elapsed);
//ee_printf(" Elapsed MTIME: %u\n", elapsed);
//ee_printf(" Elapsed MINSTRET: %lu\n", minstretDiff());
return elapsed;
}
/* Function: time_in_secs
@ -183,7 +208,7 @@ CORE_TICKS get_time(void) {
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
int retvalint = (int)retval;
ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
//ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
return retval;
}
#else

View File

@ -34,7 +34,8 @@
`define XLEN 64
//`define MISA (32'h00000104)
`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
//`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
`define ZCSR_SUPPORTED 1
`define COUNTERS 32
`define ZCOUNTERS_SUPPORTED 1
@ -53,7 +54,7 @@
`define DTLB_ENTRIES 32
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16
`define PMP_ENTRIES 64
// Address space
`define RESET_VECTOR 64'h0000000080000000
@ -66,23 +67,23 @@
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 34'h00001000
`define BOOTTIM_RANGE 34'h00000FFF
`define BOOTTIM_BASE 56'h00001000
`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 34'h80000000
`define TIM_RANGE 34'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 34'h02000000
`define CLINT_RANGE 34'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 34'h10012000
`define GPIO_RANGE 34'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 34'h10000000
`define UART_RANGE 34'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 34'h0C000000
`define PLIC_RANGE 34'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Test modes

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// include shared configuration
`include "wally-shared.vh"
`define BUILDROOT 0
`define BUSYBEAR 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 32
`define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
`define ZCSR_SUPPORTED 1
`define COUNTERS 32
`define ZCOUNTERS_SUPPORTED 1
// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 1
`define VECTORED_INTERRUPTS_SUPPORTED 1
`define ITLB_ENTRIES 32
`define DTLB_ENTRIES 32
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16
// Address space
`define RESET_VECTOR 32'h80000000
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 34'h00001000
`define BOOTTIM_RANGE 34'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 34'h80000000
`define TIM_RANGE 34'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 34'h02000000
`define CLINT_RANGE 34'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 34'h10012000
`define GPIO_RANGE 34'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 34'h10000000
`define UART_RANGE 34'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 34'h0C000000
`define PLIC_RANGE 34'h03FFFFFF
// Bus Interface width
`define AHBW 32
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 1
// Hardware configuration
`define UART_PRESCALE 1
// Interrupt configuration
`define PLIC_NUM_SRC 4
// comment out the following if >=32 sources
`define PLIC_NUM_SRC_LT_32
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 4
`define TWO_BIT_PRELOAD "../config/rv32icfd/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32icfd/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -46,7 +46,7 @@
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 0\1
`define MEM_VIRTMEM 1
`define VECTORED_INTERRUPTS_SUPPORTED 1
`define ITLB_ENTRIES 32
@ -56,10 +56,7 @@
`define PMP_ENTRIES 16
// Address space
`define RESET_VECTOR 64'h0000000080000000
// Bus Interface width
`define AHBW 64
`define RESET_VECTOR 64'h80000000
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
@ -84,6 +81,9 @@
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64
// Test modes
// Tie GPIO outputs back to inputs
@ -101,6 +101,7 @@
`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -0,0 +1,57 @@
If you do not need to update the Linux image, then go to ./linux-testvectors and
use tvCopier.py or tvLinker.sh to copy/link premade RAMs and testvectors from Tera.
The RAMs are needed for Wally to run the Linux code, and the testvectors are needed
to verify Wally is executing the code correctly.
If you instead wish to regenerate the RAMs and testvectors from a new Linux image,
you'll need to build the new Linux image, simulate it, and parse its output,
as described below.
*To build a new Linux image:
1. Git clone the Buildroot repository to ./buildroot:
git clone https://github.com/buildroot/buildroot.git
For reference, Wally (*** will) be proven to work on an image built using
Buildroot when the following was the most recent commit to the Buildroot repo:
commit 4047e10ed6e20492bae572d4929eaa5d67eed746
Author: Gwenhael Goavec-Merou <gwenhael.goavec-merou@trabucayre.com>
Date: Wed Jun 30 06:27:10 2021 +0200
2. If you wish to modify the configs, then in ./buildroot:
a. Run "make menuconfig" or "make linux-menuconfig" or "make busybox-menuconfig".
b. Use the TUI (terminal UI) to load in the existing configs.
For menuconfig, you can load in the source file from
"../buildroot-config-src/main.config"
For linux-menuconfig or busybox-menuconfig, load in from
"../../../../buildroot-config-src/<type>.config"
because for linux and busybox, make traverses down to
./buildroot/output/build/<linux or busybox>.
One annoying thing about the TUI is that if it has a path already loaded,
then before you can enter the new path to buildroot-config-src, you need to
delete the existing one from the textbox. Doing so requires more than backspace.
Once you've deleted as much of the existing path as you can see, arrow left to
check if there is more text you need to delete.
c. Likewise, when you are done editing, tell the TUI to save to the same location.
3. Finally go to ./buildroot-config-src and run make-buildroot.sh.
This script copies ./buildroot-config-src/main.config to ./buildroot/.config
and then invokes make. This is clumsy but effective because buildroot
sometimes does weird things to .config, like moving it to .config.old and
making a new .config -- doing so can really mess up symbolic/hard links.
4. If you'd like debugging symbols, then reconfigure Buildroot to output "vmlinux"
and run make-buildroot again.
*To generate new RAMs and testvectors from a Linux image:
1. sym link ./buildroot-image-output to either your new image in ./buildroot/output/image
or the existing image at /courses/e190ax/buildroot-image-output on Tera.
This might require first deleting the empty buildroot-image-output directory.
2. Then run ./testvector-generation/logBuildrootMem.sh to generate RAMs.
3. Then run ./testvector-generation/logAllBuildroot.sh to generate testvectors.
These latter two steps require QEMU.
Note that you can only have one instance of QEMU open at a time!
At least on Tera, it seems. Check "ps -ef" to see if anybody else is running QEMU.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,3 @@
cp main.config ../buildroot/.config
cd ../buildroot
make

View File

@ -1,9 +0,0 @@
#! /usr/bin/python3
test_dir = '/courses/e190ax/buildroot_boot/'
infiles = ['bootmemGDB.txt', 'ramGDB.txt']
outfiles = ['bootmem.txt', 'ram.txt']
for i in range(len(infiles)):
with open(f'{test_dir}{infiles[i]}', 'r') as f:
with open(f'{test_dir}{outfiles[i]}', 'w') as w:
for l in f:
w.write(f'{"".join([x[2:] for x in l.split()[:0:-1]])}\n')

View File

@ -1,10 +0,0 @@
set pagination off
target extended-remote :1234
b *0xffffffe00020144e
c
c
c
c
set confirm off
kill
q

View File

@ -0,0 +1 @@
This file only exists so that git will create ./.

View File

@ -0,0 +1,10 @@
# This could be nice to use if you want to mess with the testvectors
# without corrupting the stable copies on Tera.
unlink parsedCSRs.txt
unlink parsedMemRead.txt
unlink parsedMemWrite.txt
unlink parsedPC.txt
unlink parsedRegs.txt
unlink bootmem.txt
unlink ram.txt
echo "Done!"

View File

@ -1,40 +0,0 @@
# Oftentimes this script runs so long you'll go to sleep.
# But you don't want the script to die when your computer goes to sleep.
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
# You can run "tail -f nohup.out" to see what would've
# outputted to the terminal if you didn't use nohup
# =========== Debug the Process ==========
# Uncomment this version for GDB/QEMU debugging
# - Opens up GDB interactively
# - Logs raw QEMU output to qemu_output.txt
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2> qemu_output.txt) & riscv64-unknown-elf-gdb
# Uncomment this version to generate qemu_output.txt
# - Uses GDB script
# - Logs raw QEMU output to qemu_output.txt
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog_debug
# Uncomment this version for parse_qemu.py debugging
# - Uses qemu_output.txt
# - Makes qemu_in_gdb_format.txt
# - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around
# It is often helpful for general debugging
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >/courses/e190ax/buildroot_boot/qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection
#cd /courses/e190ax/buildroot_boot
#split -d -l 5600000 qemu_in_gdb_format.txt --verbose
# Uncomment this version for parse_gdb_output.py debugging
# - Uses qemu_in_gdb_format.txt
# - Logs info needed by buildroot testbench
#cat qemu_in_gdb_format.txt | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
# =========== Just Do the Thing ==========
# Uncomment this version for the whole thing
# - Logs info needed by buildroot testbench
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog

View File

@ -1,4 +0,0 @@
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>/dev/null >/dev/null ) &
riscv64-unknown-elf-gdb -x gdbinit_mem
#sed -i '$d' $file
echo "Done"

View File

@ -0,0 +1,11 @@
#! /usr/bin/python3
test_dir = '../'
gdbMemfileDir = '../linux-testvectors/intermediate-outputs/'
fixedMemfileDir = '../linux-testvectors/'
infiles = ['bootmemGDB.txt', 'ramGDB.txt']
outfiles = ['bootmem.txt', 'ram.txt']
for i in range(len(infiles)):
with open(f'{gdbMemfileDir}{infiles[i]}', 'r') as f:
with open(f'{fixedMemfileDir}{outfiles[i]}', 'w') as w:
for l in f:
w.write(f'{"".join([x[2:] for x in l.split()[:0:-1]])}\n')

View File

@ -0,0 +1,3 @@
file ../buildroot-image-output/vmlinux
set pagination off
target extended-remote :1236

View File

@ -1,20 +1,20 @@
set pagination off
target extended-remote :1234
target extended-remote :1235
set logging overwrite on
set logging redirect on
printf "Creating bootmemGDB.txt\n"
set logging file /courses/e190ax/buildroot_boot/bootmemGDB.txt
set logging file ../linux-testvectors/intermediate-outputs/bootmemGDB.txt
set logging on
x/4096xb 0x1000
set logging off
printf "Creating bootmem_untrimmed_GDB.txt\n"
printf "Warning - please verify that the second half of bootmem_untrimmed_GDB.txt is all 0s\n"
set logging file /courses/e190ax/buildroot_boot/bootmem_untrimmed_GDB.txt
set logging file ../linux-testvectors/intermediate-outputs/bootmem_untrimmed_GDB.txt
set logging on
x/8192xb 0x1000
set logging off
printf "Creating ramGDB.txt\n"
set logging file /courses/e190ax/buildroot_boot/ramGDB.txt
set logging file ../linux-testvectors/intermediate-outputs/ramGDB.txt
set logging on
x/134217728xb 0x80000000
set logging off

View File

@ -0,0 +1,11 @@
set pagination off
target extended-remote :1236
file ../buildroot-image-output/vmlinux
b arch_cpu_idle
c
c
c
c
set confirm off
kill
q

View File

@ -1,9 +1,10 @@
set pagination off
target extended-remote :1234
maint print symbols symbols.txt
b *0x000000008020103c
c
del 1
stepi 100
stepi 100000
set confirm off
kill
q

View File

@ -0,0 +1,44 @@
# Oftentimes this script runs so long you'll go to sleep.
# But you don't want the script to die when your computer goes to sleep.
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
# You can run "tail -f nohup.out" to see what would've
# outputted to the terminal if you didn't use nohup
customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qemu-system-riscv64"
#customQemu="qemu-system-riscv64"
imageDir="../buildroot-image-output"
intermedDir="../linux-testvectors/intermediate-outputs"
outDir="../linux-testvectors"
# =========== Debug the Process ==========
# Uncomment this version for QEMU debugging of kernel
# - good for poking around VM if it boots up
# - good for running QEMU commands (press "Ctrl-A" then "c" to open QEMU command prompt)
#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio
# Uncomment this version for GDB debugging of kernel
# - attempts to load in symbols from "vmlinux"
# - good for looking at backtraces when Linux gets stuck for some reason
#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -gdb tcp::1236 -S & riscv64-unknown-elf-gdb -x gdbinit_debug
# Uncomment this version to generate qemu_output.txt
# - Uses GDB script
# - Logs raw QEMU output to qemu_output.txt
#($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2> $intermedDir/qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog_debug
# Uncomment this version for parse_qemu.py debugging
# - Uses qemu_output.txt
# - Makes qemu_in_gdb_format.txt
# - Splits qemu_in_gdb_format.txt into chunks of 100,000 instrs
#cat $intermedDir/qemu_output.txt | ./parse_qemu.py >$intermedDir/qemu_in_gdb_format.txt
#cd $intermedDir
#split -d -l 5600000 ./qemu_in_gdb_format.txt --verbose
#cd ../../testvector-generation
# Uncomment this version for parse_gdb_output.py debugging
# - Uses qemu_in_gdb_format.txt
# - Makes testvectors#cat $intermedDir/qemu_in_gdb_format.txt | ./parse_gdb_output.py "$outDir"
# =========== Just Do the Thing ==========
# Uncomment this version for the whole thing
# - Logs info needed by buildroot testbench
($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "$outDir") & riscv64-unknown-elf-gdb -x gdbinit_qemulog

View File

@ -0,0 +1,7 @@
customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qemu-system-riscv64"
imageDir="../buildroot-image-output"
($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1235 -S 2>/dev/null >/dev/null) &
riscv64-unknown-elf-gdb -x gdbinit_mem
echo "Translating Mem from GDB to Questa format"
./fix_mem.py
echo "Done"

View File

@ -9,7 +9,7 @@ csrs = ['fcsr','mcause','mcounteren','medeleg','mepc','mhartid','mideleg','mie',
list(map(csrs.remove, ['fcsr','mhartid','pmpcfg0','pmpaddr0','mip']))
#output_path = '/courses/e190ax/busybear_boot_new/'
#output_path = '/courses/e190ax/buildroot_boot/'
output_path = sys.argv[1]
output_path = sys.argv[1]+'/'
print(f'output dir: {output_path}')
instrs = -1
try:

View File

@ -3,13 +3,16 @@ import fileinput, sys
sys.stderr.write("reminder: this script takes input from stdin\n")
parseState = "idle"
beginPageFault = 0
inPageFault = 0
endPageFault = 0
CSRs = {}
pageFaultCSRs = {}
regs = {}
pageFaultRegs = {}
instrs = {}
instrCount = 0
returnAdr = 0
def printPC(l):
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, instrCount
@ -33,8 +36,8 @@ def printCSRs():
def parseCSRs(l):
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs
if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")):
if l.startswith(' x0/zero'):
if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")) and not inPageFault:
if l.startswith(' x0/zero'):
parseState = "regFile"
instr = instrs[CSRs["pc"]]
printPC(instr)
@ -42,24 +45,31 @@ def parseCSRs(l):
else:
csr = l.split()[0]
val = int(l.split()[1],16)
if inPageFault:
# Commented out this conditional because the pageFault instrs don't corrupt CSRs
#if inPageFault:
# Not sure if these CSRs should be updated or not during page fault.
if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
#if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
# We do update some CSRs
CSRs[csr] = val
else:
# CSRs[csr] = val
#else:
# Others we preserve until changed later
pageFaultCSRs[csr] = val
elif pageFaultCSRs and (csr in pageFaultCSRs):
if (val != pageFaultCSRs[csr]):
del pageFaultCSRs[csr]
CSRs[csr] = val
# pageFaultCSRs[csr] = val
#elif pageFaultCSRs and (csr in pageFaultCSRs):
# if (val != pageFaultCSRs[csr]):
# del pageFaultCSRs[csr]
# CSRs[csr] = val
#else:
# CSRs[csr] = val
#
# However SEPC and STVAL do get corrupted upon exiting
if endPageFault and ((csr == 'sepc') or (csr == 'stval')):
CSRs[csr] = returnAdr
else:
CSRs[csr] = val
def parseRegs(l):
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs
if "mcounteren" in l:
if "pc" in l:
printCSRs()
# New non-disassembled instruction
parseState = "CSRs"
@ -100,8 +110,12 @@ for l in fileinput.input():
elif (parseState == "instr") and l.startswith('0x'):
if "out of bounds" in l:
sys.stderr.write("Detected QEMU page fault error\n")
beginPageFault = ~(inPageFault)
if beginPageFault:
returnAdr = int(l.split()[0][2:-1], 16)
inPageFault = 1
else:
endPageFault = inPageFault
inPageFault = 0
adr = int(l.split()[0][2:-1], 16)
instrs[adr] = l

View File

@ -0,0 +1,3 @@
vsim -c <<!
do wally-pipelined-batch-rv32icfd.do ../config/rv32icfd rv32icfd
!

View File

@ -0,0 +1 @@
vsim -do wally-pipelined-rv32icfd.do

View File

@ -0,0 +1,42 @@
# wally-pipelined-batch.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined-batch.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined-batch.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined-batch.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work_$2] {
vdel -lib work_$2 -all
}
vlib work_$2
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
switch $argc {
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
2 {vlog -work work_$2 +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt work_$2.testbench -work work_$2 -o workopt_$2
vsim -lib work_$2 workopt_$2
run -all
quit

View File

@ -0,0 +1,50 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined.do ../config/rv32ic
switch $argc {
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
view wave
-- display input and output signals as hexidecimal values
do ./wave-dos/default-waves.do
-- Run the Simulation
#run 5000
run -all
#quit
noview ../testbench/testbench-imperas.sv
view wave

View File

@ -152,7 +152,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/PCTargetE
add wave -noupdate -radix hexadecimal /testbench/dut/hart/CSRReadValW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/PrivilegedNextPCM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemRWM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrMisalignedFaultM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/DataMisalignedM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/IllegalBaseInstrFaultD
@ -168,7 +168,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/CSRWritePendingDEM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/LoadStallD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/SetFflagsM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FRM_REGW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FloatRegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FRegWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemRWAlignedM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/Funct3M
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemAdrM
@ -337,7 +337,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/Funct3M
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/ReadDataW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/CSRReadValW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/PCLinkW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/StallD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/FlushD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/FlushE
@ -397,7 +397,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/FlushW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/ResultSrcW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/CSRWritePendingDEM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteE
@ -740,8 +740,8 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/CSRReadValW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/PrivilegedNextPCM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/RetM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/TrapM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/FloatRegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/FRegWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/LoadStallD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/PrivilegedM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrMisalignedFaultM
@ -842,8 +842,8 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/uretM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/TimerIntM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/ExtIntM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/SwIntM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/FloatRegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/FRegWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/LoadStallD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/NextPrivilegeModeM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/PrivilegeModeW
@ -937,7 +937,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/WriteSSTATUSM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/WriteUSTATUSM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/TrapM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/FloatRegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/FRegWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/NextPrivilegeModeM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/PrivilegeModeW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/mretM
@ -972,7 +972,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/STATUS_UIE
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/clk
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/reset
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/InstrValidW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/InstrValidM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/LoadStallD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/CSRMWriteM
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/CSRAdrM

View File

@ -8,7 +8,7 @@ add wave /testbench/clk
add wave /testbench/reset
add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
#add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD

View File

@ -1,65 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Block Name: add.v
// Author: David Harris
// Date: 11/12/1995
//
// Block Description:
// This block performs the addition of the product and addend. It also
// contains logic necessary to adjust the signs for effective subtracts
// and negative results.
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
module add(rM, sM, tM, sum,
negsum, invz, selsum1, negsum0, negsum1, killprodM);
////////////////////////////////////////////////////////////////////////////////
input logic [105:0] rM; // partial product 1
input logic [105:0] sM; // partial product 2
input logic [163:0] tM; // aligned addend
input logic invz; // invert addend
input logic selsum1; // select +1 mode of compound adder
input logic killprodM; // z >> product
input logic negsum; // Negate sum
output logic [163:0] sum; // sum
output logic negsum0; // sum was negative in +0 mode
output logic negsum1; // sum was negative in +1 mode
// Internal nodes
wire [105:0] r2; // partial product possibly zeroed out
wire [105:0] s2; // partial product possibly zeroed out
wire [164:0] t2; // addend after inversion if necessary
wire [164:0] sum0; // sum of compound adder +0 mode
wire [164:0] sum1; // sum of compound adder +1 mode
wire [163:0] prodshifted; // sum of compound adder +1 mode
wire [164:0] tmp; // sum of compound adder +1 mode
// Invert addend if z'sM sign is diffrent from the product'sM sign
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
// Zero out product if Z >> product or product really should be
assign r2 = killprodM ? 106'b0 : rM;
assign s2 = killprodM ? 106'b0 : sM;
//***replace this with a more structural cpa that synthisises better
// Compound adder
// Consists of 3:2 CSA followed by long compound CPA
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
// Check sign bits in +0/1 modes
assign negsum0 = sum0[164];
assign negsum1 = sum1[164];
// Mux proper result (+Oil mode and inversion) using 4:1 mux
//assign sumzero = |sum;
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
endmodule

View File

@ -1,88 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: align.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the alignment shifter. It is responsible for
// adjusting the fraction portion of the addend relative to the fraction
// produced in the multiplier array.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
killprodE, sumshiftE, sumshiftzeroE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] zman; // Fraction of addend z;
input logic [12:0] aligncntE; // amount to shift
input logic xzeroE; // Input X = 0
input logic yzeroE; // Input Y = 0
input logic zzeroE; // Input Z = 0
input logic zdenormE; // Input Z is denormalized
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
output logic bsE; // sticky bit of addend
output logic killprodE; // Z >> product
output logic [8:0] sumshiftE;
output logic sumshiftzeroE;
// Internal nodes
reg [215:0] shift; // aligned addend from shifter
logic [12:0] tmp;
always_comb
begin
// Default to clearing sticky bits
bsE = 0;
// And to using product as primary operand in adder I exponent gen
killprodE = xzeroE | yzeroE;
// d = aligncntE
// p = 53
//***try reducing this hardware to use one shifter
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
//product ancored case with saturated shift
sumshiftE = 163; // 3p+4
sumshiftzeroE = 0;
shift = {1'b1,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
// product ancored or cancellation
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0]; // p + 2 - d
sumshiftzeroE = 0;
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
// addend ancored case
// used to be 56 \/ somthing doesn't seem right too many typos
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0];
sumshiftzeroE = 0;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else begin // d >= p+3
// addend anchored case with saturated shift
sumshiftE = 0;
sumshiftzeroE = 1;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
killprodE = 1;
end
end
endmodule

View File

@ -1,53 +0,0 @@
module booth(xExt, choose, add1, e, pp);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] xExt; // multiplicand xExt
input logic [2:0] choose; // bits needed to choose which encoding
output logic [1:0] add1; // do you add 1
output logic e;
output logic [54:0] pp; // the resultant encoding
logic [54:0] temp;
logic [53:0] negx;
//logic temp;
assign negx = ~xExt;
always_comb
case (choose)
3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1
3'b010 : pp = {1'b0, xExt}; // 1
3'b011 : pp = {xExt, 1'b0}; // 2
3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = '1; // -0
endcase
always_comb
case (choose)
3'b000 : e = 0; // 0
3'b001 : e = 0; // 1
3'b010 : e = 0; // 1
3'b011 : e = 0; // 2
3'b100 : e = 1; // -2
3'b101 : e = 1; // -1
3'b110 : e = 1; // -1
3'b111 : e = 1; // -0
endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2];
always_comb
case (choose)
3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1
3'b010 : add1 = 2'b0; // 1
3'b011 : add1 = 2'b0; // 2
3'b100 : add1 = 2'b10; // -2
3'b101 : add1 = 2'b1; // -1
3'b110 : add1 = 2'b1; // -1
3'b111 : add1 = 2'b1; // -0
endcase
endmodule

View File

@ -1,90 +0,0 @@
module add3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
output logic [BITS-1:0] carry;
output logic [BITS-1:0] sum;
genvar i;
generate
for(i= 0; i<BITS; i=i+1) begin
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
end
endgenerate
endmodule
module add4comp2(a, b, c, d, carry, sum);
/////////////////////////////////////////////////////////////////////////////
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
input logic [BITS-1:0] d;
output logic [BITS:0] carry;
output logic [BITS-1:0] sum;
logic [BITS-1:0] cout;
logic carryTmp;
genvar i;
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
generate
for(i= 1; i<BITS-1; i=i+1) begin
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
end
endgenerate
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
assign carry[BITS-1] = carryTmp & cout[BITS-1];
assign carry[BITS] = carryTmp ^ cout[BITS-1];
endmodule
module sng3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
input logic a;
input logic b;
input logic c;
output logic carry;
output logic sum;
logic axorb;
assign axorb = a ^ b;
assign sum = axorb ^ c;
assign carry = axorb ? c : a;
endmodule
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters?
input logic a;
input logic b;
input logic c;
input logic d;
input logic cin;
output logic cout;
output logic carry;
output logic sum;
logic TmpSum;
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
endmodule

View File

@ -1,140 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen(xexp, yexp, zexp,
killprod, sumzero, resultdenorm, normcnt, infinity,
FmaFlagsM, inf, xzero, yzero,expplus1,
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
aligncnt, wexp,
prodof, sumof, sumuf, denorm0, ae);
/////////////////////////////////////////////////////////////////////////////
input [62:52] xexp; // Exponent of multiplicand x
input [62:52] yexp; // Exponent of multiplicand y
input [62:52] zexp; // Exponent of addend z
input killprod; // Z >> product
input sumzero; // sum exactly equals zero
input resultdenorm; // postnormalize rounded result
input [8:0] normcnt; // normalization shift count
input infinity; // generate infinity on overflow
input [4:0] FmaFlagsM; // Result invalid
input inf; // Some input is infinity
input nan; // Some input is NaN
input [12:0] de0; // X is NaN NaN
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input xdenorm; // Z is denorm
input ydenorm; // Z is denorm
input zdenorm; // Z is denorm
input xzero; // Z is denorm
input yzero; // Z is denorm
input expplus1;
input proddenorm; // product is denorm
input specialsel; // Select special result
input zexpsel; // Select special result
output [12:0] aligncnt; // shift count for alignment shifter
output [62:52] wexp; // Exponent of result
output prodof; // X*Y exponent out of bounds
output sumof; // X*Y+Z exponent out of bounds
output sumuf; // X*Y+Z exponent underflows
output denorm0; // exponent = 0 for denorm
output [12:0] ae; //exponent of multiply
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
assign ae = xzero|yzero ? 0 : xexp + yexp -1023;
assign prodof = (ae > 2046 && ~ae[12]);
// Compute alignment shift count
// Adjust for postrounding normalization of Z.
// This should not increas the critical path because the time to
// check if a round overflows is shorter than the actual round and
// is masked by the bypass mux and two 10 bit adder delays.
assign aligncnt0 = - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
assign aligncnt1 = - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
assign aligncnt = zexp -ae - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
//assign aligncnt = zexp -ae - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
//assign aligncnt = zexp - ae;// KEP use all of ae
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : ae;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// If the exponent becomes exactly zero (denormalized)
// signal such to adjust R bit before rounding
assign denorm0 = (de0 == 0);
// check for exponent out of bounds after add
assign de = resultdenorm | sumzero ? 0 : de0;
assign sumof = ~de[12] && de > 2046;
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
// IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload."
assign nanres = xnan ? xexp : (ynan ? yexp : (znan? zexp : 11'b11111111111));
// A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
endmodule

View File

@ -1,90 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
xdenormE, ydenormE, zdenormE,
aligncntE, prodof, aeE);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic xdenormE; // Z is denorm
input logic ydenormE; // Z is denorm
input logic zdenormE; // Z is denorm
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [12:0] aligncntE; // shift count for alignment shifter
output logic prodof; // X*Y exponent out of bounds
output logic [12:0] aeE; //exponent of multiply
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
assign prodof = (aeE > 2046 && ~aeE[12]);
// Compute alignment shift count
// Adjust for postrounding normalization of Z.
// This should not increas the critical path because the time to
// check if a round overflows is shorter than the actual round and
// is masked by the bypass mux and two 10 bit adder delays.
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
//assign aligncntE = zexp - aeE;// KEP use all of aeE
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : aeE;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
endmodule

View File

@ -1,108 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen2(xexp, yexp, zexp,
sumzero, resultdenorm, infinity,
FmaFlagsM, inf, expplus1,
nanM, de0, xnanM, ynanM, znanM, specialsel,
wexp,
sumof, sumuf);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic sumzero; // sum exactly equals zero
input logic resultdenorm; // postnormalize rounded result
input logic infinity; // generate infinity on overflow
input logic [4:0] FmaFlagsM; // Result invalid
input logic inf; // Some input is infinity
input logic nanM; // Some input is NaN
input logic [12:0] de0; // X is NaN NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic expplus1;
input logic specialsel; // Select special result
output logic [62:52] wexp; // Exponent of result
output logic sumof; // X*Y+Z exponent out of bounds
output logic sumuf; // X*Y+Z exponent underflows
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : ae;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// check for exponent out of bounds after add
assign de = resultdenorm | sumzero ? 0 : de0;
assign sumof = ~de[12] && de > 2046;
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
// IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload."
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
// A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
endmodule

View File

@ -1,88 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
psign, zsign, xzero, yzero, zzero, vbits, killprod,
inf, nan, FmaFlagsM,sticky);
/////////////////////////////////////////////////////////////////////////////
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input sticky; // X is Inf
input xinf; // X is Inf
input yinf; // Y is Inf
input zinf; // Z is Inf
input prodof; // X*Y overflows exponent
input sumof; // X*Y + z underflows exponent
input sumuf; // X*Y + z underflows exponent
input psign; // Sign of product
input zsign; // Sign of z
input xzero; // x = 0
input yzero; // y = 0
input zzero; // y = 0
input killprod;
input [1:0] vbits; // R and S bits of result
output inf; // Some source is Inf
output nan; // Some source is NaN
output [4:0] FmaFlagsM;
// Internal nodes
wire prodinf; // X*Y larger than max possible
wire suminf; // X*Y+Z larger than max possible
// If any input is NaN, propagate the NaN
assign nan = xnan || ynan || znan;
// Same with infinity (inf - inf and O * inf don't propagate inf
// but it's ok becaue illegal op takes higher precidence)
assign inf= xinf || yinf || zinf || suminf;//KEP added suminf
//assign inf= xinf || yinf || zinf;//original
// Generate infinity checks
assign prodinf = prodof && ~xnan && ~ynan;
//KEP added if the product is infinity then sum is infinity
assign suminf = sumof && ~xnan && ~ynan && ~znan;
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input is denormalized
// 2) Output would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
endmodule

View File

@ -1,34 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
/////////////////////////////////////////////////////////////////////////////
input logic xnanE; // X is NaN
input logic ynanE; // Y is NaN
input logic znanE; // Z is NaN
input logic prodof; // X*Y overflows exponent
output logic nanE; // Some source is NaN
// Internal nodes
output logic prodinfE; // X*Y larger than max possible
// If any input logic is NaN, propagate the NaN
assign nanE = xnanE || ynanE || znanE;
// Generate infinity checks
assign prodinfE = prodof && ~xnanE && ~ynanE;
endmodule

View File

@ -1,80 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
xzeroM, yzeroM, zzeroM, vbits, killprodM,
inf, nanM, FmaFlagsM,sticky,prodinfM);
/////////////////////////////////////////////////////////////////////////////
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic xsign; // Sign of z
input logic ysign; // Sign of z
input logic zsign; // Sign of z
input logic sticky; // X is Inf
input logic prodinfM;
input logic xinfM; // X is Inf
input logic yinfM; // Y is Inf
input logic zinfM; // Z is Inf
input logic sumof; // X*Y + z underflows exponent
input logic sumuf; // X*Y + z underflows exponent
input logic xzeroM; // x = 0
input logic yzeroM; // y = 0
input logic zzeroM; // y = 0
input logic killprodM;
input logic [1:0] vbits; // R and S bits of result
output logic inf; // Some source is Inf
input logic nanM; // Some source is NaN
output logic [4:0] FmaFlagsM;
// Internal nodes
logic suminf;
// Same with infinity (inf - inf and O * inf don't propagate inf
// but it's ok becaue illegal op takes higher precidence)
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
//assign inf= xinfM || yinfM || zinfM;//original
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input logic is denormalized
// 2) output logic would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
endmodule

View File

@ -1,132 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Exponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
FmaResultM, FmaFlagsM, aligncnt);
/////////////////////////////////////////////////////////////////////////////
input [63:0] ReadData1E; // input 1
input [63:0] ReadData2E; // input 2
input [63:0] ReadData3E; // input 3
input [2:0] FrmE; // Rounding mode
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
output [4:0] FmaFlagsM; // status flags
output [12:0] aligncnt; // status flags
// Internal nodes
logic [105:0] r; // one result of partial product sum
logic [105:0] s; // other result of partial products
logic [163:0] t; // output of alignment shifter
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic [12:0] ae; // multiplier expoent
logic bs; // sticky bit of addend
logic ps; // sticky bit of product
logic killprod; // ReadData3E >> product
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
logic negsum0; // sum +0 < 0
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic prodof; // ReadData1E*ReadData2E out of range
logic sumof; // result out of range
logic xzero;
logic yzero;
logic zzero;
logic xdenorm;
logic ydenorm;
logic zdenorm;
logic proddenorm;
logic zexpsel;
logic denorm0;
logic resultdenorm;
logic inf;
logic xinf;
logic yinf;
logic zinf;
logic xnan;
logic ynan;
logic znan;
logic specialsel;
logic expplus1;
logic nan;
logic sumuf;
logic psign;
logic sticky;
logic [8:0] sumshift;
logic sumshiftzero;
logic [12:0] de0;
logic isAdd;
assign isAdd = 1;
// Instantiate fraction datapath
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
align align(.zman(ReadData3E[51:0]),.*);
add add(.*);
lza lza(.*);
normalize normalize(.zexp(ReadData3E[62:52]),.*);
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
// Instantiate exponent datapath
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
// Instantiate control logic
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
endmodule

View File

@ -1,165 +0,0 @@
module fma1(
input logic [63:0] FInput1E, // X
input logic [63:0] FInput2E, // Y
input logic [63:0] FInput3E, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [211:0] Shift; // output of the alignment shifter including sticky bit
logic XDenormE, YDenormE, ZDenormE; // inputs are denormal
logic [63:0] FInput3E2; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
// Set addend to zero if FMUL instruction
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
// split inputs into the sign bit, fraction, and exponent and handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = FInput1E[63];
assign YSgn = FInput2E[63];
assign ZSgn = FInput3E2[63];
assign XExp = FmtE ? {2'b0, FInput1E[62:52]} : {5'b0, FInput1E[62:55]};
assign YExp = FmtE ? {2'b0, FInput2E[62:52]} : {5'b0, FInput2E[62:55]};
assign ZExp = FmtE ? {2'b0, FInput3E2[62:52]} : {5'b0, FInput3E2[62:55]};
assign XFrac = FmtE ? FInput1E[51:0] : {FInput1E[54:32], 29'b0};
assign YFrac = FmtE ? FInput2E[51:0] : {FInput2E[54:32], 29'b0};
assign ZFrac = FmtE ? FInput3E2[51:0] : {FInput3E2[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
// determine if an input is a special value
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
// Calculate the product's exponent
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenormE + YDenormE;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
// Alignment shifter
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
always_comb
begin
// Set default values
AddendStickyE = 0;
KillProdE = 0;
// If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
AlignedAddendE = {107'b0, ZMan,2'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
Shift = {55'b0, ZMan, 104'b0} << -AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
Shift = {55'b0, ZMan, 104'b0} >> AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
AlignedAddendE = 162'b0;
AddendStickyE = ~ZZeroE;
end
end
endmodule

View File

@ -1,282 +0,0 @@
module fma2(
input logic [63:0] FInput1M,
input logic [63:0] FInput2M,
input logic [63:0] FInput3M,
input logic [2:0] FrmM,
input logic [105:0] ProdManM,
input logic [161:0] AlignedAddendM,
input logic [12:0] ProdExpM,
input logic FmtM,
input logic AddendStickyM,
input logic KillProdM,
input logic [2:0] FOpCtrlM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic XNaNM, YNaNM, ZNaNM,
output logic [63:0] FmaResultM,
output logic [4:0] FmaFlagsM);
logic [51:0] XMan, YMan, ZMan, WMan;
logic [10:0] XExp, YExp, ZExp, WExp;
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
logic [105:0] ProdMan2;
logic [162:0] AlignedAddend2;
logic [161:0] Sum;
logic [162:0] SumTmp;
logic [12:0] SumExp;
logic [12:0] SumExpMinus1;
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
logic [53:0] NormSum;
logic [161:0] NormSumTmp;
logic [8:0] NormCnt;
logic NormSumSticky;
logic SumZero;
logic NegSum;
logic InvZ;
logic ResultDenorm;
logic Sticky;
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
logic Invalid,Underflow,Overflow,Inexact;
logic [8:0] DenormShift;
logic ProdInf, ProdOf, ProdUf;
logic [63:0] FmaResultTmp;
logic SubBySmallNum;
logic [63:0] FInput3M2;
logic ZeroSgn, ResultSgn;
// Set addend to zero if FMUL instruction
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = FInput1M[63];
assign YSgn = FInput2M[63];
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign XExp = FmtM ? FInput1M[62:52] : {3'b0, FInput1M[62:55]};
assign YExp = FmtM ? FInput2M[62:52] : {3'b0, FInput2M[62:55]};
assign ZExp = FmtM ? FInput3M2[62:52] : {3'b0, FInput3M2[62:55]};
assign XMan = FmtM ? FInput1M[51:0] : {FInput1M[54:32], 29'b0};
assign YMan = FmtM ? FInput2M[51:0] : {FInput2M[54:32], 29'b0};
assign ZMan = FmtM ? FInput3M2[51:0] : {FInput3M2[54:32], 29'b0};
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
// Addition
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
// Is the sum negitive
assign NegSum = SumTmp[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
// Normalization
// Determine if the sum is zero
assign SumZero = ~(|Sum);
logic [12:0] ManLen;
assign ManLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-ManLen));
// Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Normalize the sum
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = NormSumTmp[161:108];
// Calculate the sticky bit
assign NormSumSticky = FmtM ? (|NormSumTmp[107:0]) : (|NormSumTmp[136:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 :
SumExpTmp;
// Rounding
// round to nearest even
// {Gaurd, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if NormSum[2] = 1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// round to zero - do nothing
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to -infinity - Plus1 if negitive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to infinity - Plus1 if positive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
// round to nearest max magnitude
// {Gaurd, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// Deterimine if the result was supposed to be subtrated by a small number
logic Gaurd, Round;
assign Gaurd = FmtM ? NormSum[1] : NormSum[30];
assign Round = FmtM ? NormSum[0] : NormSum[29];
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: Plus1Tmp = Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&NormSum[2]));//round to nearest even
3'b001: Plus1Tmp = 0;//round to zero
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
3'b100: Plus1Tmp = (Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky)));//round to nearest max magnitude
default: Plus1Tmp = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: Minus1Tmp = 0;//round to nearest even
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
default: Minus1Tmp = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = Sticky | (Gaurd|Round) ? Plus1Tmp : 1'b0;
assign Minus1 = Sticky | (Gaurd|Round) ? Minus1Tmp : 1'b0;
// Compute rounded result
assign {WExpTmp, WMan} = FmtM ? {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1} : {{SumExp, NormSum[53:31]} - {35'b0, Minus1} + {35'b0, Plus1}, 28'b0};
assign WExp = WExpTmp[10:0];
// Sign calculation
// Determine the sign if the sum is zero
// if product underflows then use psign
// otherwise
// if cancelation then 0 unless round to -inf
// otherwise psign
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
// Select the result
assign FmaResultM = XNaNM ? (FmtM ? {XSgn, FInput1M[62:52], 1'b1,FInput1M[50:0]} : {XSgn, FInput1M[62:55], 1'b1,FInput1M[53:0]}) :
YNaNM ? (FmtM ? {YSgn, FInput2M[62:52], 1'b1,FInput2M[50:0]} : {YSgn, FInput2M[62:55], 1'b1,FInput2M[53:0]}) :
ZNaNM ? (FmtM ? {ZSgn, FInput3M2[62:52], 1'b1,FInput3M2[50:0]} : {ZSgn, FInput3M2[62:55], 1'b1,FInput3M2[53:0]}) :
Invalid ? (FmtM ? {WSgn, 11'h7ff, 1'b1, 51'b0} : {WSgn, 8'h7f8, 1'b1, 54'b0}) : // has to be before inf
XInfM ? {PSgn, FInput1M[62:0]} :
YInfM ? {PSgn, FInput2M[62:0]} :
ZInfM ? {ZSgn, FInput3M2[62:0]} :
Overflow ? (FmtM ? {WSgn, 11'h7ff, 52'b0} : {WSgn, 8'h7f8, 55'b0}) :
Underflow & ~ResultDenorm ? (FmtM ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {{WSgn, 31'b0} - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : //***do you need minus1?
KillProdM ? (FmtM ? FInput3M2 - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {FInput3M2[63:32] - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : // has to be after Underflow
FmtM ? {WSgn,WExp,WMan} : {WSgn,WExp[6:0],WMan,4'b0};
logic [63:0] tmp;
assign tmp = {WSgn,WExp[6:0],WMan,4'b0};
// Set Invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) any input is a signaling NaN
logic [12:0] MaxExp;
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign ProdOf = (ProdExpM >= MaxExp && ~ProdExpM[12]);
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
assign SigNaN = FmtM ? (XNaNM&~FInput1M[51]) | (YNaNM&~FInput2M[51]) | (ZNaNM&~FInput3M2[51]) : (XNaNM&~FInput1M[54]) | (YNaNM&~FInput2M[54]) | (ZNaNM&~FInput3M2[54]);
assign Invalid = SigNaN | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
assign Overflow = WExpTmp >= MaxExp & ~WExpTmp[12];
// Set Underflow flag if the number is too small to be represented in normal numbers
assign ProdUf = KillProdM & ZZeroM;
assign Underflow = SumExp[12] | ProdUf;
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
assign Inexact = (Sticky|Overflow| (Gaurd|Round))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result is exact
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
endmodule

View File

@ -1,40 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: lop.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements a Leading One Predictor used to determine
// the normalization shift count.
///////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module lza(sum, normcnt, sumzero);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
output logic [8:0] normcnt; // normalization shift count
output logic sumzero; // sum = 0
// Internal nodes
reg [8:0] i; // loop index
// A real LOP uses a fast carry chain to find only the first 0.
// It is an example of a parallel prefix algorithm. For the sake
// of simplicity, this model is behavioral instead.
// A real LOP would also operate on the sources of the adder, not
// the result!
always_comb
begin
i = 0;
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
normcnt = i; // compute shift count
end
// Also check if sum is zero
assign sumzero = ~(|sum);
endmodule

View File

@ -1,136 +0,0 @@
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] xman; // Fraction of multiplicand x
input logic [51:0] yman; // Fraction of multiplicand y
input logic xdenormE; // is x denormalized
input logic ydenormE; // is y denormalized
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [105:0] rE; // partial product 1
output logic [105:0] sE; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1;
wire [26:0][54:0] pp;
wire [26:0] e;
logic [106:0] tmpsE;
logic [17:0][106:0] lv1add;
logic [11:0][106:0] lv2add;
logic [7:0][106:0] lv3add;
logic [3:0][106:0] lv4add;
logic [21:0][107:0] carryTmp;
wire [26:0][106:0] acc;
// wire [105:0] acc
genvar i;
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
generate
for(i=0; i<27; i=i+1) begin
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
end
endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders
generate
for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
end
endgenerate
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(tmpsE));
assign sE = tmpsE[105:0];
assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0;
// assign sE = acc[0] +
// acc[1] +
// acc[2] +
// acc[3] +
// acc[4] +
// acc[5] +
// acc[6] +
// acc[7] +
// acc[8] +
// acc[9] +
// acc[10] +
// acc[11] +
// acc[12] +
// acc[13] +
// acc[14] +
// acc[15] +
// acc[16] +
// acc[17] +
// acc[18] +
// acc[19] +
// acc[20] +
// acc[21] +
// acc[22] +
// acc[23] +
// acc[24] +
// acc[25] +
// acc[26];
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
// assign rE = 0;
endmodule

View File

@ -1,147 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: normalize.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block performs the normalization shift. It also
// generates the Rands bits for rounding. Finally, it
// handles the special case of a zero sum.
//
// v[53:2] is the fraction component of the prerounded result.
// It can be bypassed back to the X or Z inputs of the FMAC
// for back-to-back operations.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
input logic [62:52] zexp; // sum
input logic [8:0] normcnt; // normalization shift count
input logic [12:0] aeM; // normalization shift count
input logic [12:0] aligncntM; // normalization shift count
input logic [8:0] sumshiftM; // normalization shift count
input logic sumshiftzeroM;
input logic sumzero; // sum is zero
input logic bsM; // sticky bit for addend
input logic xdenormM; // Input Z is denormalized
input logic ydenormM; // Input Z is denormalized
input logic zdenormM; // Input Z is denormalized
input logic xzeroM;
input logic yzeroM;
input logic zzeroM;
output logic sticky; //sticky bit
output logic [12:0] de0;
output logic resultdenorm; // Input Z is denormalized
output logic [53:0] v; // normalized sum, R, S bits
// Internal nodes
logic [163:0] sumshifted; // shifted sum
logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum
logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// When the sum is zero, normalization does not apply and only the
// sticky bit must be computed. Otherwise, the sum is right-shifted
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
// The R bit is also set on denormalized numbers where the exponent
// was computed to be exactly -1023 and the L bit was set. This
// is required for correct rounding up of multiplication results.
// The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
always_comb
begin
// d = aligncntM
// l = normcnt
// p = 53
// ea + eb = aeM
// set d<=2 to d<=0
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
// product anchored or cancellation
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
//normal result
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
//de0 = aeM-normcnt+2-1023;
end else begin
sumshifted = sum << (13'd1080+aeM);
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
resultdenorm = 1;
de0 = 0;
end
end else begin // extract normalized bits
sumshifttmp = {1'b0,sumshiftM} - 2;
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
tmp4 = sumshifted[160];
tmp5 = sumshifted[159];
// for some reason use exp = zexp + {0,1,2}
// the book says exp = zexp + {-1,0,1}
if(sumshiftzeroM) begin
v = sum[162:109];
sticky = (|sum[108:0]) | bsM;
de0 = {2'b0,zexp};
end else if(sumshifted[163] & ~sumshifttmp[9])begin
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
de0 = {2'b0,zexp} +13'd2;
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
v = sumshifted[161:108];
sticky = (|sumshifted[107:0]) | bsM;
de0 = {2'b0,zexp}+13'd1;
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {2'b0,zexp}+{12'b0,zdenormM};
end else if(sumshifted[160]& ~zdenormM) begin
de0 = {2'b0,zexp}-13'b1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bsM;
//de0 = zexp-1;
end else if(sumshifted[159]& ~zdenormM) begin
//v = sumshifted[158:105];
de0 = {2'b0,zexp}-13'd2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bsM;
//de0 = zexp-1;
end else if(zdenormM) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {{2{zexp[62]}},zexp};
end else begin
de0 = 0;
sumshifted = sum << sumshiftM-1; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
end
resultdenorm = (~|de0 | de0[12]);
end
end
// shift sum left by normcnt, filling the right with zeros
//assign sumshifted = sum << normcnt;
endmodule

View File

@ -1,124 +0,0 @@
/////////////////////////////////////////////////////////////////////////////
// Block Name: round.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
//
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
// muxed in to form the actual result for register file writeback. This
// saves a mux from the writeback path.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module round(v, sticky, FrmM, wsign,
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
xman, yman, zman,
wman, infinity, specialsel,expplus1);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] v; // normalized sum, R, S bits
input logic sticky; //sticky bit
input logic [2:0] FrmM;
input logic wsign; // Sign of result
input logic [4:0] FmaFlagsM;
input logic inf; // Some input logic is infinity
input logic nanM; // Some input logic is NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic [51:0] xman; // input logic X
input logic [51:0] yman; // input logic Y
input logic [51:0] zman; // input logic Z
output logic [51:0] wman; // rounded result of FMAC
output logic infinity; // Generate infinity on overflow
output logic specialsel; // Select special result
output logic expplus1;
// Internal nodes
logic plus1; // Round by adding one
wire [52:0] v1; // Result + 1 (for rounding)
wire [51:0] specialres; // Result of exceptional case
wire [51:0] infinityres; // Infinity or largest real number
wire [51:0] nanres; // Propagated or generated NaN
// Compute if round should occur. This equation is derived from
// the rounding tables.
// round to infinity - plus1 if positive
// round to -infinity - plus1 if negitive
// round to zero - do nothing
// round to nearest even
// {v[1], v[0], sticky}
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
always_comb begin
case (FrmM)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero
3'b010: plus1 = wsign;//round down
3'b011: plus1 = ~wsign;//round up
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
default: plus1 = 1'bx;
endcase
end
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
// (rp & ~wsign) |
// (rm & wsign);
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
// rp && ~wsign && (v[1] || v[0]) ||
// rm && wsign && (v[1] || v[0]);
// Compute rounded result
assign v1 = v[53:2] + 1;
// Determine if postnormalization is necessary
// Predicted by all bits =1 before round +1
//assign postnormalize = &(v[53:2]) && plus1;
// Determine special result in event of of selection of a result from
// another FPU functional unit, infinity, NAN, or underflow
// The special result mux is a 4:1 mux that should not appear in the
// critical path of the machine. It is not priority encoded, despite
// the code below suggesting otherwise. Also, several of the identical data
// input logics to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation.
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
nanM || inf;
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 52'b0 :
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
// Overflow is handled differently for different rounding modes
// Round is to either infinity or to maximum finite number
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should
// propagate an input logic if the input logic is NaN. Since we assume all
// NaN input logics are already quiet, we don't have to force them quiet.
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
// IEEE 754-2008 section 6.2.3 states:
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input logic NaNs if representable in the destination
// format. This standard does not specify which of the input logic NaNs will provide the payload."
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
// Select result with 4:1 mux
// If the sum is zero and we round up, there is a special case in
// which we produce a massive loss of significance and trap to software.
// It is handled in the exception unit.
assign expplus1 = v1[52] & ~specialsel & plus1;
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
endmodule

View File

@ -1,111 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: sign.v
// Author: David Harris
// Date: 12/1/1995
//
// Block Description:
// This block manages the signs of the numbers.
// 1 = negative
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
////////////////////////////////////////////////////////////////////////////I
input logic xsign; // Sign of X
input logic ysign; // Sign of Y
input logic zsign; // Sign of Z
input logic isAdd;
input logic negsum0; // Sum in +O mode is negative
input logic negsum1; // Sum in +1 mode is negative
input logic bsM; // sticky bit from addend
input logic [2:0] FrmM; // Round toward minus infinity
input logic [4:0] FmaFlagsM; // Round toward minus infinity
input logic sumzero; // Sum = O
input logic zinfM; // Y = Inf
input logic inf; // Some input logic = Inf
output logic wsign; // Sign of W
output logic invz; // Invert addend into adder
output logic negsum; // Negate result of adder
output logic selsum1; // Select +1 mode from compound adder
// Internal nodes
wire zerosign; // sign if result= 0
wire sumneg; // sign if result= 0
wire infsign; // sign if result= Inf
logic tmp;
// Compute sign of product
assign psign = xsign ^ ysign;
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
//do you invert z
assign invz = (zsign ^ psign);
assign selsum1 = invz;
//negate sum if its negitive
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
// is the sum negitive
// if p - z is the sum negitive
// if -p + z is the sum positive
// if -p - z then the sum is negitive
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
//always @(invz or negsum0 or negsum1 or bsM or ps)
// begin
// if (~invz) begin // both input logics have same sign
// negsum = 0;
// selsum1 = 0;
// end else if (bsM) begin // sticky bit set on addend
// selsum1 = 0;
// negsum = negsum0;
// end else if (ps) begin // sticky bit set on product
// selsum1 = 1;
// negsum = negsum1;
// end else begin // both sticky bits clear
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
// selsum1 = ~negsum1; //original
// negsum = negsum1;
// end
//end
// Compute sign of result
// This involves a special case when the sum is zero:
// x+x retains the same sign as x even when x = +/- 0.
// otherwise, x-x = +O unless in the RM mode when x-x = -0
// There is also a special case for NaNs and invalid results;
// the sign of the NaN produced is forced to be 0.
// Sign calculation is not in the critical path so the cases
// can be tolerated.
// IEEE 754-2008 section 6.3 states
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
// also pertaining to negZero it states:
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
// assign zerosign = (~invz && killprodM) ? zsign : 0;
// zero sign
// if product underflows then use psign
// otherwise
// addition
// if cancelation then 0 unless round to -inf
// otherwise psign
// subtraction
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = FmaFlagsM[1] ? psign :
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
(psign^zsign ? psign : FrmM == 3'b010));
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
endmodule

View File

@ -1,67 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: special.v
// Author: David Harris
// Date: 12/2/1995
//
// Block Description:
// This block implements special case handling for unusual operands (e.g.
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] ReadData1E; // Input ReadData1E
input logic [63:0] ReadData2E; // Input ReadData2E
input logic [63:0] ReadData3E; // Input ReadData3E
output logic xzeroE; // Input ReadData1E = 0
output logic yzeroE; // Input ReadData2E = 0
output logic zzeroE; // Input ReadData3E = 0
output logic xnanE; // ReadData1E is NaN
output logic ynanE; // ReadData2E is NaN
output logic znanE; // ReadData3E is NaN
output logic xdenormE; // ReadData1E is denormalized
output logic ydenormE; // ReadData2E is denormalized
output logic zdenormE; // ReadData3E is denormalized
output logic xinfE; // ReadData1E is infinity
output logic yinfE; // ReadData2E is infinity
output logic zinfE; // ReadData3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnanE = &ReadData1E[62:52] && |ReadData1E[51:0];
assign ynanE = &ReadData2E[62:52] && |ReadData2E[51:0];
assign znanE = &ReadData3E[62:52] && |ReadData3E[51:0];
// Check if input is denormalized
assign xdenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
assign ydenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
assign zdenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
// Check if input is infinity
assign xinfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
assign yinfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
assign zinfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
// In the circuit implementation, this can be optimized
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|ReadData1E[62:0]);
assign yzeroE = ~(|ReadData2E[62:0]);
assign zzeroE = ~(|ReadData3E[62:0]);
endmodule

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -29,23 +29,27 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.
assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
op1[58] | op1[57] | op1[56] | op1[55]);
assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
op2[58] | op2[57] | op2[56] | op2[55]);
assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
op1[58] & op1[57] & op1[56] & op1[55]);
assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
op2[58] & op2[57] & op2[56] &op2[55]);
assign Zexp1 = ~(|op1[30:23]);
assign Zexp2 = ~(|op2[30:23]);
assign Oexp1 = (&op1[30:23]);
assign Oexp2 = (&op2[30:23]);
// assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
// op1[58] | op1[57] | op1[56] | op1[55]);
// assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
// op2[58] | op2[57] | op2[56] | op2[55]);
// assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
// op1[58] & op1[57] & op1[56] & op1[55]);
// assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
// op2[58] & op2[57] & op2[56] &op2[55]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = conv_SP ? {op1[62], {3{(~op1[62]&~Zexp1)|Oexp1}}, op1[61:32]}
assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
: op1[62:29];
assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
// Conditionally convert op2. Lower 29 bits are zero for single precision.
assign Float2[62:29] = conv_SP ? {op2[62],
{3{(~op2[62]&~Zexp2)|Oexp2}}, op2[61:32]}
assign Float2[62:29] = conv_SP ? {op2[30],
{3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
: op2[62:29];
assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
@ -54,8 +58,8 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
assign negate = op_type[2] & ~op_type[1] & op_type[0];
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
assign Float1[63] = (op1[63] ^ negate) & ~abs_val;
assign Float2[63] = op2[63];
assign Float1[63] = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
assign Float2[63] = conv_SP ? op2[31] : op2[63];
endmodule // convert_inputs

View File

@ -3,22 +3,21 @@
// it conditionally converts single precision values to double
// precision values and modifies the sign of op1.
// The converted operands are Float1 and Float2.
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input P; // Result Precision (0 for double, 1 for single)
input op_type; // Operation
input logic [63:0] op1; // 1st input operand (A)
input logic [63:0] op2; // 2nd input operand (B)
input logic P; // Result Precision (0 for double, 1 for single)
input logic op_type; // Operation
output [63:0] Float1; // Converted 1st input operand
output [63:0] Float2b; // Converted 2nd input operand
output logic [63:0] Float1; // Converted 1st input operand
output logic [63:0] Float2b; // Converted 2nd input operand
wire [63:0] Float2;
wire Zexp1; // One if the exponent of op1 is zero
wire Zexp2; // One if the exponent of op2 is zero
wire Oexp1; // One if the exponent of op1 is all ones
wire Oexp2; // One if the exponent of op2 is all ones
logic [63:0] Float2;
logic Zexp1; // One if the exponent of op1 is zero
logic Zexp2; // One if the exponent of op2 is zero
logic Oexp1; // One if the exponent of op1 is all ones
logic Oexp2; // One if the exponent of op2 is all ones
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.

76
wally-pipelined/src/fpu/divconv.sv Normal file → Executable file
View File

@ -1,11 +1,6 @@
// `timescale 1ps/1ps
module divconv (q1, qm1, qp1, q0, qm0, qp0,
rega_out, regb_out, regc_out, regd_out,
regr_out, d, n,
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, P, op_type, exp_odd);
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
input logic [52:0] d, n;
input logic [2:0] sel_muxa, sel_muxb;
@ -40,9 +35,7 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
logic [127:0] constant, constant2;
logic [63:0] q_const, qp_const, qm_const;
logic [63:0] d2, n2;
logic [11:0] d3;
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7, muxr_out;
logic [11:0] d3;
// Check if exponent is odd for sqrt
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
@ -68,9 +61,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// TDM multiplier (carry/save)
multiplier mult1 (mcand, mplier, Sum, Carry); // ***multiply
multiplier mult1 (mcand, mplier, Sum, Carry);
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2); //***adder
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
@ -80,15 +73,17 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
// CPA (from CSA)/Remainder addition/subtraction
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out); //***adder
adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
// Assuming [1,2) - q1
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0); //***adder
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0); //***adder
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1); //***adder
// Assuming [0.5,1) - q0
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0); //***adder
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0); //***adder
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1); //***adder
adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
// Assuming [0.5,1) - q0
adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);
@ -112,9 +107,11 @@ endmodule // divconv
// module adder #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b,
// output logic [WIDTH-1:0] y);
// input logic cin,
// output logic [WIDTH-1:0] y,
// output logic cout);
// assign y = a + b;
// assign {cout, y} = a + b + cin;
// endmodule // adder
@ -226,10 +223,33 @@ endmodule // divconv
// endmodule // mux6
// module eqcmp #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] a, b,
// output logic y);
module eqcmp #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] a, b,
output logic y);
// assign y = (a == b);
assign y = (a == b);
// endmodule // eqcmp
endmodule // eqcmp
// module fa (input logic a, b, c, output logic sum, carry);
// assign sum = a^b^c;
// assign carry = a&b|a&c|b&c;
// endmodule // fa
// module csa #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b, c,
// output logic [WIDTH-1:0] sum, carry);
// logic [WIDTH:0] carry_temp;
// genvar i;
// generate
// for (i=0;i<WIDTH;i=i+1)
// begin : genbit
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
// end
// endgenerate
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
// endmodule // csa

View File

@ -1,38 +1,36 @@
// Exception logic for the floating point adder. Note: We may
// actually want to move to where the result is computed.
module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
input [63:0] A; // 1st input operand (op1)
input [63:0] B; // 2nd input operand (op2)
input op_type; // Determine operation
input logic [63:0] A; // 1st input operand (op1)
input logic [63:0] B; // 2nd input operand (op2)
input logic op_type; // Determine operation
output [2:0] Ztype; // Indicates type of result (Z)
output Invalid; // Invalid operation exception
output Denorm; // Denormalized input
output ANorm; // A is not zero or Denorm
output BNorm; // B is not zero or Denorm
output logic [2:0] Ztype; // Indicates type of result (Z)
output logic Invalid; // Invalid operation exception
output logic Denorm; // Denormalized input
output logic ANorm; // A is not zero or Denorm
output logic BNorm; // B is not zero or Denorm
wire AzeroM; // '1' if the mantissa of A is zero
wire BzeroM; // '1' if the mantissa of B is zero
wire AzeroE; // '1' if the exponent of A is zero
wire BzeroE; // '1' if the exponent of B is zero
wire AonesE; // '1' if the exponent of A is all ones
wire BonesE; // '1' if the exponent of B is all ones
wire ADenorm; // '1' if A is a denomalized number
wire BDenorm; // '1' if B is a denomalized number
wire AInf; // '1' if A is infinite
wire BInf; // '1' if B is infinite
wire AZero; // '1' if A is 0
wire BZero; // '1' if B is 0
wire ANaN; // '1' if A is a not-a-number
wire BNaN; // '1' if B is a not-a-number
wire ASNaN; // '1' if A is a signalling not-a-number
wire BSNaN; // '1' if B is a signalling not-a-number
wire ZQNaN; // '1' if result Z is a quiet NaN
wire ZInf; // '1' if result Z is an infnity
wire square_root; // '1' if square root operation
wire Zero; // '1' if result is zero
logic AzeroM; // '1' if the mantissa of A is zero
logic BzeroM; // '1' if the mantissa of B is zero
logic AzeroE; // '1' if the exponent of A is zero
logic BzeroE; // '1' if the exponent of B is zero
logic AonesE; // '1' if the exponent of A is all ones
logic BonesE; // '1' if the exponent of B is all ones
logic ADenorm; // '1' if A is a denomalized number
logic BDenorm; // '1' if B is a denomalized number
logic AInf; // '1' if A is infinite
logic BInf; // '1' if B is infinite
logic AZero; // '1' if A is 0
logic BZero; // '1' if B is 0
logic ANaN; // '1' if A is a not-a-number
logic BNaN; // '1' if B is a not-a-number
logic ASNaN; // '1' if A is a signalling not-a-number
logic BSNaN; // '1' if B is a signalling not-a-number
logic ZQNaN; // '1' if result Z is a quiet NaN
logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero
parameter [51:0] fifty_two_zeros = 52'h0; // Use parameter?
@ -93,4 +91,3 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
assign Ztype[2] = BZero&~op_type;
endmodule // exception

View File

@ -0,0 +1,417 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module faddcvt(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic [63:0] FSrcXE, // 1st input operand (A)
input logic [63:0] FSrcYE, // 2nd input operand (B)
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
input logic [2:0] FrmM, // Rounding mode - specify values
output logic [63:0] FAddResM, // Result of operation
output logic [4:0] FAddFlgM); // IEEE exception flags
logic [63:0] AddSumE, AddSumM;
logic [63:0] AddSumTcE, AddSumTcM;
logic [3:0] AddSelInvE, AddSelInvM;
logic [10:0] AddExpPostSumE,AddExpPostSumM;
logic AddCorrSignE, AddCorrSignM;
logic AddOp1NormE, AddOp1NormM;
logic AddOp2NormE, AddOp2NormM;
logic AddOpANormE, AddOpANormM;
logic AddOpBNormE, AddOpBNormM;
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentE, AddExponentM;
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
endmodule
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
input logic [63:0] FSrcXE; // 1st input operand (A)
input logic [63:0] FSrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
wire P;
assign P = ~FmtE;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
output logic [63:0] AddFloat1E;
output logic [63:0] AddFloat2E;
output logic [10:0] AddExponentE;
output logic [10:0] AddExpPostSumE;
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE;
output logic [3:0] AddSelInvE;
output logic AddCorrSignE;
output logic AddSignAE;
output logic AddOp1NormE, AddOp2NormE;
output logic AddOpANormE, AddOpBNormE;
output logic AddInvalidE;
output logic AddDenormInE;
// output logic exp_valid;
output logic AddConvertE;
output logic AddSwapE;
output logic AddNormOvflowE;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the FOpCtrlE , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
AddFloat1E, AddFloat2E, FOpCtrlE);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, AddFloat1E[62:52]};
assign exp2 = {1'b0, AddFloat2E[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if FOpCtrlE does not
// specify addition or subtraction
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign AddSwapE = exp_diff1[11] & ~zeroB;
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
// assign IntValue [31:0] = FSrcXE[31:0];
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
endmodule // fpadd
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and AddConvertM SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put AddSumM onto output.
//
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
input FmtM; // Result Precision (0 for double, 1 for single)
// input AddOvEnM; // Overflow trap enabled
// input AddUnEnM; // Underflow trap enabled
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
input [11:0] AddExp1DenormM, AddExp2DenormM;
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
//input exp_valid;
input [3:0] AddSelInvM;
input AddOp1NormM, AddOp2NormM;
input AddOpANormM, AddOpBNormM;
input AddInvalidM;
input AddDenormInM;
input AddSignAM;
input AddCorrSignM;
input AddConvertM;
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
wire AddDenormM; // AddDenormM on input or output
wire P;
assign P = ~FmtM;
wire [10:0] exp_pre;
wire [63:0] Result;
wire [63:0] sum_norm, sum_norm_w_bypass;
wire [5:0] norm_shift, norm_shift_denorm;
wire exp_valid;
wire DenormIO;
wire [4:0] FlagsIn;
wire Sticky_out;
wire sign_corr;
wire zeroB;
wire [10:0] AddExpPostSumM;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire normal_underflow;
wire [63:0] sum_corr;
logic AddNormOvflowM;
logic AddOvEnM; // Overflow trap enabled
logic AddUnEnM; // Underflow trap enabled
assign AddOvEnM = 1'b1;
assign AddUnEnM = 1'b1;
//AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = AddDenormInM ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
: (AddConvertM ? 11'b10000111100 : AddExponentM);
// Finds normal underflow result to determine whether to round final AddExponentM down
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on AddSwapM result
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting AddSumM
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
// Determine the correct sign of the result
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
// If the AddSumM is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
// Finds normal underflow result to determine whether to round final AddExponentM down
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected AddSumM and the amount by which the AddSumM should
// be right shifted. It outputs the normalized AddSumM.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign FAddResM = Result;
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -2,49 +2,52 @@
`include "wally-config.vh"
module fclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-Single 1-Double
input logic XSgnE,
input logic [51:0] XFracE,
input logic XNaNE,
input logic XSNaNE,
input logic XNormE,
input logic XDenormE,
input logic XZeroE,
input logic XInfE,
// input logic FmtE, // 0-Single 1-Double
output logic [63:0] ClassResE
);
logic [31:0] Single;
logic [63:0] Double;
logic Sgn;
logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, QNaN, PZero, PNorm, PDenorm;
logic NInf, SNaN, NZero, NNorm, NDenorm;
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// logic XSgnE;
// logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm;
// logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// Single and Double precision layouts
assign Single = SrcXE[63:32];
assign Double = SrcXE;
assign Sgn = SrcXE[63];
// assign XSgnE = FmtE ? FSrcXE[63] : FSrcXE[31];
// basic calculations for readabillity
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
// assign ExpZero = FmtE ? ~|FSrcXE[62:52] : ~|FSrcXE[30:23];
// assign MaxExp = FmtE ? &FSrcXE[62:52] : &FSrcXE[30:23];
// assign ManZero = FmtE ? ~|FSrcXE[51:0] : ~|FSrcXE[22:0];
// assign FirstBitFrac = FmtE ? FSrcXE[51] : FSrcXE[22];
// determine the type of number
assign NaN = MaxExp & ~ManZero;
assign Inf = MaxExp & ManZero;
assign Zero = ExpZero & ManZero;
assign Denorm= ExpZero & ~ManZero;
assign Norm = ~ExpZero;
// assign NaN = MaxExp & ~ManZero;
// assign Inf = MaxExp & ManZero;
// assign Zero = ExpZero & ManZero;
// assign Denorm= ExpZero & ~ManZero;
// assign Norm = ~ExpZero;
// determine the sub categories
assign QNaN = FirstBitFrac&NaN;
assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~Sgn&Inf;
assign NInf = Sgn&Inf;
assign PNorm = ~Sgn&Norm;
assign NNorm = Sgn&Norm;
assign PDenorm = ~Sgn&Denorm;
assign NDenorm = Sgn&Denorm;
assign PZero = ~Sgn&Zero;
assign NZero = Sgn&Zero;
// assign QNaN = FirstBitFrac&NaN;
// assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~XSgnE&XInfE;
assign NInf = XSgnE&XInfE;
assign PNorm = ~XSgnE&XNormE;
assign NNorm = XSgnE&XNormE;
assign PDenorm = ~XSgnE&XDenormE;
assign NDenorm = XSgnE&XDenormE;
assign PZero = ~XSgnE&XZeroE;
assign NZero = XSgnE&XZeroE;
// determine sub category and combine into the result
// bit 0 - -Inf
@ -57,6 +60,6 @@ module fclassify (
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

View File

@ -42,28 +42,32 @@
module fcmp (
input logic [63:0] op1,
input logic [63:0] op2,
input logic XNaNE, YNaNE,
input logic XZeroE, YZeroE,
input logic [63:0] FSrcXE,
input logic [63:0] FSrcYE,
input logic [2:0] FOpCtrlE,
input logic FmtE,
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] CmpResE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
logic [1:0] FCC; // Condition Codes
logic [7:0] w, x;
logic ANaN, BNaN;
logic Azero, Bzero;
// logic ANaN, BNaN;
// logic Azero, Bzero;
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
@ -72,24 +76,10 @@ module fcmp (
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .FSrcXE, .FSrcYE, .*);
endmodule // fpcomp
// module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
@ -195,135 +185,6 @@ module magcompare64b_1 (w, x, A, B);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
input logic [63:0] A;
input logic [63:0] B;
input logic [2:0] FOpCtrlE;
logic dp, sp, hp;
output logic ANaN;
output logic BNaN;
output logic Azero;
output logic Bzero;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
(hp&(A[57]|A[56])));
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
(hp&(B[57]|B[56])));
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
// the 63 least siginficant bits of A are zero).
// Depending on how this synthesizes, it may work better to replace
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
assign Azero = (A[62:0] == 63'h0);
assign Bzero = (B[62:0] == 63'h0);
endmodule // exception_cmp
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
@ -385,6 +246,8 @@ endmodule // magcompare64b
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic [63:0] FSrcXE,
input logic [63:0] FSrcYE,
input logic FmtE,
input logic LT_mag,
input logic EQ_mag,
@ -453,8 +316,8 @@ module exception_cmp_2 (
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpResE = LT ? A : B;//min
3'b101: CmpResE = GT ? A : B;//max
3'b111: CmpResE = LT ? FSrcXE : FSrcYE;//min
3'b101: CmpResE = GT ? FSrcXE : FSrcYE;//max
3'b010: CmpResE = {63'b0, EQ};//equal
3'b001: CmpResE = {63'b0, LT};//less than
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal

View File

@ -6,7 +6,7 @@ module fctrl (
input logic [2:0] Funct3D,
input logic [2:0] FRM_REGW,
output logic IllegalFPUInstrD,
output logic FWriteEnD,
output logic FRegWriteD,
output logic FDivStartD,
output logic [2:0] FResultSelD,
output logic [3:0] FOpCtrlD,
@ -21,7 +21,7 @@ module fctrl (
// FPU Instruction Decoder
always_comb
case(OpD)
// FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
@ -64,44 +64,44 @@ module fctrl (
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// if dynamic rounding, choose FRM_REGW
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
@ -109,7 +109,7 @@ module fctrl (
// Precision
// 0-single
// 1-double
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// div/sqrt
// fdiv = ???0
// fsqrt = ???1

View File

@ -0,0 +1,190 @@
`include "wally-config.vh"
module fcvt (
input logic XSgnE,
input logic [10:0] XExpE,
input logic [51:0] XFracE,
input logic XAssumed1E,
input logic XZeroE,
input logic XNaNE,
input logic XInfE,
input logic XDenormE,
input logic [10:0] BiasE,
input logic [`XLEN-1:0] SrcAE, // integer input
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtE, // precision 1 = double 0 = single
output logic [63:0] CvtResE, // convert final result
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
logic ResSgn; // FP result's sign
logic [10:0] ResExp,TmpExp; // FP result's exponent
logic [51:0] ResFrac; // FP result's fraction
logic [5:0] LZResP; // lz output
logic [7:0] Bits; // how many bits are in the integer result
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
logic [63:0] Rounded; // rounded result
logic [12:0] ExpVal; // unbiased X exponent
logic [12:0] ShiftCnt; // how much is the mantissa shifted
logic [64-1:0] IntIn; // trimed integer input
logic [64-1:0] PosInt; // absolute value of the integer input
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
logic Of, Uf; // did the integer result underflow or overflow
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
logic Plus1,CalcPlus1; // do you add one for rounding
logic SgnRes; // sign of the floating point result
logic Res64, In64; // is the result or input 64 bits
logic RoundMSB; // most significant bit of the fraction
logic RoundSgn; // sign of the rounded result
// FOpCtrlE:
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// calculate signals based off the input and output's size
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
assign SubBits = In64 ? 8'd64 : 8'd32;
assign Bits = Res64 ? 8'd64 : 8'd32;
// calulate the unbiased exponent
assign ExpVal = XExpE - BiasE + XDenormE;
////////////////////////////////////////////////////////
// position the input in the most significant bits
assign IntIn = FOpCtrlE[3] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
// make the integer positive
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
// determine the integer's sign
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
// generate
// if(`XLEN == 64)
// lz64 lz(LZResP, LZResV, PosInt);
// else if(`XLEN == 32) begin
// assign LZResP[5] = 1'b0;
// lz32 lz(LZResP[4:0], LZResV, PosInt);
// end
// endgenerate
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~PosInt[64-1-i] && i < `XLEN) i = i+1; // search for leading one
LZResP = i+1; // compute shift count
end
// if no one was found set to zero otherwise calculate the exponent
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
////////////////////////////////////////////
// select the shift value and amount based on operation (to fp or int)
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0};
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
// if the shift is negitive add a bit for sticky bit calculation
// otherwise shift left
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt;
// truncate the shifted mantissa
assign ShiftedMan = ShiftedManTmp[64+51:50];
// calculate sticky bit
// - take into account the possible right shift from before
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
// determine guard, round, and least significant bit of the result
assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
always_comb begin
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
end
// dont tound if the result is exact
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
// round the shifted mantissa
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
// fit the rounded result into the appropriate size and take the 2's complement if needed
assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
// check if the result overflows
assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
// check if the result underflows (this calculation changes if the result is signed or unsigned)
assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
// calculate the result's sign
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
// select the integer result
assign CvtIntRes = Of ? FOpCtrlE[2] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
Rounded[64-1:0];
// select the floating point result
assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
// select the result
assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
// calculate the flags
// - to int only sets the invalid flag
// - from int only sets the inexact flag
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
endmodule // fpadd

View File

@ -27,40 +27,40 @@
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnM, FWriteEnW,
input logic FRegWriteM, FRegWriteW,
input logic [4:0] RdM, RdW,
input logic [2:0] FResultSelM,
output logic FStallD,
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
);
always_comb begin
// set ReadData as default
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
if ((Adr1E == RdM) & FWriteEnM)
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
if ((Adr2E == RdM) & FWriteEnM)
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
if ((Adr3E == RdM) & FWriteEnM)
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
end

View File

@ -1,10 +1,177 @@
module fma(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE,
input logic [51:0] XFracE, YFracE, ZFracE,
input logic XSgnM, YSgnM, ZSgnM,
input logic [10:0] XExpM, YExpM, ZExpM,
input logic [51:0] XFracM, YFracM, ZFracM,
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic XNaNM, YNaNM, ZNaNM,
input logic XSNaNM, YSNaNM, ZSNaNM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic [10:0] BiasE,
output logic [63:0] FMAResM,
output logic [4:0] FMAFlgM);
logic [105:0] ProdManE, ProdManM;
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
.BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE);
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE},
{AddendStickyM, KillProdM});
fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM,
.FOpCtrlM, .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FMAResM, .FMAFlgM);
endmodule
module fma1(
// input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE,
input logic [51:0] XFracE, YFracE, ZFracE,
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic [10:0] BiasE,
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE // set the product to zero before addition if the product is too small to matter
);
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExpE + YExpE - BiasE + XDenormE + YDenormE;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
end
end
assign AlignedAddendE = ZManShifted[213:52];
endmodule
module fma2(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic XSgnM, YSgnM, ZSgnM,
input logic [10:0] XExpM, YExpM, ZExpM,
input logic [51:0] XFracM, YFracM, ZFracM,
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtM, // precision 1 = double 0 = single
@ -16,6 +183,7 @@ module fma2(
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
output logic [63:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -24,8 +192,6 @@ module fma2(
logic [51:0] ResultFrac; // Result fraction
logic [10:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic [10:0] ZExp; // input exponent
logic XSgn, YSgn, ZSgn; // input sign
logic PSgn; // product sign
logic [105:0] ProdMan2; // product being added
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
@ -61,28 +227,10 @@ module fma2(
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
///////////////////////////////////////////////////////////////////////////////
// Select input fields
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
// split inputs into the sign bit, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1];
@ -93,7 +241,7 @@ module fma2(
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
assign InvZ = ZSgnM ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
@ -148,7 +296,7 @@ module fma2(
assign FracLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
@ -273,13 +421,13 @@ module fma2(
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// otherwise psign
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
@ -297,9 +445,8 @@ module fma2(
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
@ -327,28 +474,28 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]};
assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]};
assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]};
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
{ResultSgn, 11'h7ff, 52'b0} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
{ResultSgn, 8'hff, 55'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
{{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf
XInfM ? {PSgn, X[62:0]} :
YInfM ? {PSgn, Y[62:0]} :
ZInfM ? {ZSgn, Addend[62:0]} :
XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} :
XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} :
XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} :
Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow
Underflow & ~ResultDenorm ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
{{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};

View File

@ -1,184 +0,0 @@
module fma1(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
end
end
assign AlignedAddendE = ZManShifted[213:52];
endmodule

152
wally-pipelined/src/fpu/fpdiv.sv Executable file
View File

@ -0,0 +1,152 @@
//
// File name : fpdiv
// Title : Floating-Point Divider/Square-Root
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
//
// Copyright Oklahoma State University
//
// Basic Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
//
// `timescale 1ps/1ps
module fpdiv (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
start, reset, clk);
input [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B)
input [1:0] rm; // Rounding mode - specify values
input op_type; // Function opcode
input P; // Result Precision (0 for double, 1 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
input start;
input reset;
input clk;
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
logic done;
// output done;
supply1 vdd;
supply0 vss;
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
wire [12:0] exp_s;
wire [12:0] exp_c;
wire [10:0] exponent, exp_pre;
wire [63:0] Result;
wire [52:0] mantissaA;
wire [52:0] mantissaB;
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
wire [5:0] align_shift;
wire [5:0] norm_shift;
wire [2:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_gt63;
wire Sticky_out;
wire signResult, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
wire [63:0] rega_out, regb_out, regc_out, regd_out;
wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr;
wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regsv;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, op_type);
// Determine Sign/Mantissa
assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type;
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2);
// Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr,
reset, clk, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, P, op_type, exp_odd);
// FSM : control divider
fsm_div control (done, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, error, op_type);
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
rounder_div round1 (Result, DenormIO, FlagsIn,
rm, P, OvEn, UnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
endmodule // fpadd

View File

@ -1,9 +0,0 @@
module adder_ip #(parameter WIDTH=8)
(input logic [WIDTH-1:0] a, b,
input logic cin,
output logic [WIDTH-1:0] y,
output logic cout);
assign {cout, y} = a + b + cin;
endmodule // adder

View File

@ -3,8 +3,7 @@
// it conditionally converts single precision values to double
// precision values and modifies the sign of op1.
// The converted operands are Float1 and Float2.
module convert_inputs(Float1, Float2b, op1, op2, op_type, P);
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
input logic [63:0] op1; // 1st input operand (A)
input logic [63:0] op2; // 2nd input operand (B)

View File

@ -1,19 +1,13 @@
`timescale 1ps/1ps
module divconv (q1, qm1, qp1, q0, qm0, qp0,
rega_out, regb_out, regc_out, regd_out,
regr_out, d, n,
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, load_regp,
P, op_type, exp_odd);
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
input logic [52:0] d, n;
input logic [2:0] sel_muxa, sel_muxb;
input logic sel_muxr;
input logic load_rega, load_regb, load_regc, load_regd;
input logic load_regr, load_regs;
input logic load_regp;
input logic P;
input logic op_type;
input logic exp_odd;
@ -78,86 +72,47 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mx8 ({64'h0000_0000_0000_0200}, {64'h0000_0040_0000_0000}, P, q_const);
mux2 #(64) mx9 ({64'h0000_0000_0000_0A00}, {64'h0000_0140_0000_0000}, P, qp_const);
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
logic [127:0] Sum_pipe;
logic [127:0] Carry_pipe;
logic muxr_pipe;
logic rega_pipe;
logic regb_pipe;
logic regc_pipe;
logic regd_pipe;
logic regs_pipe;
logic regr_pipe;
logic P_pipe;
logic op_type_pipe;
logic [63:0] q_const_pipe;
logic [63:0] qm_const_pipe;
logic [63:0] qp_const_pipe;
// Pipeline Stage 2 of iteration for Goldschmidt's algorithm
flopenr #(128) regp1 (clk, reset, load_regp, Sum2, Sum_pipe);
flopenr #(128) regp2 (clk, reset, load_regp, Carry2, Carry_pipe);
flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe);
flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe);
flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe);
flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe);
flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe);
flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe);
flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe);
flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe);
flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe);
flopenr #(64) regpC (clk, reset, load_regp, q_const, q_const_pipe);
flopenr #(64) regpD (clk, reset, load_regp, qp_const, qp_const_pipe);
flopenr #(64) regpE (clk, reset, load_regp, qm_const, qm_const_pipe);
// CPA (from CSA)/Remainder addition/subtraction
adder_ip #(128) cpa1 (Sum_pipe, Carry_pipe, muxr_pipe, mul_out, cout1);
// ldf128 cpa1 (cout1, mul_out, Sum_pipe, Carry_pipe, muxr_pipe);
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126] , mul_out[126], ~mul_out[125:63]};
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type_pipe, twocmp_out);
adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
// Assuming [1,2) - q1
adder_ip #(64) cpa2 (regb_out, q_const_pipe, 1'b0, q_out1, cout2);
adder_ip #(64) cpa3 (regb_out, qp_const_pipe, 1'b0, qp_out1, cout3);
adder_ip #(64) cpa4 (regb_out, qm_const_pipe, 1'b1, qm_out1, cout4);
adder_ip #(64) cpa5 ({regb_out[62:0], vss}, q_const_pipe, 1'b0, q_out0, cout5);
adder_ip #(64) cpa6 ({regb_out[62:0], vss}, qp_const_pipe, 1'b0, qp_out0, cout6);
adder_ip #(64) cpa7 ({regb_out[62:0], vss}, qm_const_pipe, 1'b1, qm_out0, cout7);
//ldf64 cpa2 (cout2, q_out1, regb_out, q_const_pipe, 1'b0);
//ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const_pipe, 1'b0);
//ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const_pipe, 1'b1);
// Assuming [0.5,1) - q0
//ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const_pipe, 1'b0);
//ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const_pipe, 1'b0);
//ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const_pipe, 1'b1);
adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
// Assuming [0.5,1) - q0
adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);
// regs
flopenr #(64) regc (clk, reset, regc_pipe, twocmp_out, regc_out);
flopenr #(64) regb (clk, reset, regb_pipe, mul_out[126:63], regb_out);
flopenr #(64) rega (clk, reset, rega_pipe, mul_out[126:63], rega_out);
flopenr #(64) regd (clk, reset, regd_pipe, mul_out[126:63], regd_out);
// remainder
flopenr #(128) regr (clk, reset, regr_pipe, mul_out, regr_out);
flopenr #(64) regc (clk, reset, load_regc, twocmp_out, regc_out);
flopenr #(64) regb (clk, reset, load_regb, mul_out[126:63], regb_out);
flopenr #(64) rega (clk, reset, load_rega, mul_out[126:63], rega_out);
flopenr #(64) regd (clk, reset, load_regd, mul_out[126:63], regd_out);
flopenr #(128) regr (clk, reset, load_regr, mul_out, regr_out);
// Assuming [1,2)
flopenr #(64) rege (clk, reset, regs_pipe, {q_out1[63:39], (q_out1[38:10] & {29{~P_pipe}}), 10'h0}, q1);
flopenr #(64) regf (clk, reset, regs_pipe, {qm_out1[63:39], (qm_out1[38:10] & {29{~P_pipe}}), 10'h0}, qm1);
flopenr #(64) regg (clk, reset, regs_pipe, {qp_out1[63:39], (qp_out1[38:10] & {29{~P_pipe}}), 10'h0}, qp1);
flopenr #(64) rege (clk, reset, load_regs, {q_out1[63:39], (q_out1[38:10] & {29{~P}}), 10'h0}, q1);
flopenr #(64) regf (clk, reset, load_regs, {qm_out1[63:39], (qm_out1[38:10] & {29{~P}}), 10'h0}, qm1);
flopenr #(64) regg (clk, reset, load_regs, {qp_out1[63:39], (qp_out1[38:10] & {29{~P}}), 10'h0}, qp1);
// Assuming [0,1)
flopenr #(64) regh (clk, reset, regs_pipe, {q_out0[63:39], (q_out0[38:10] & {29{~P_pipe}}), 10'h0}, q0);
flopenr #(64) regj (clk, reset, regs_pipe, {qm_out0[63:39], (qm_out0[38:10] & {29{~P_pipe}}), 10'h0}, qm0);
flopenr #(64) regk (clk, reset, regs_pipe, {qp_out0[63:39], (qp_out0[38:10] & {29{~P_pipe}}), 10'h0}, qp0);
flopenr #(64) regh (clk, reset, load_regs, {q_out0[63:39], (q_out0[38:10] & {29{~P}}), 10'h0}, q0);
flopenr #(64) regj (clk, reset, load_regs, {qm_out0[63:39], (qm_out0[38:10] & {29{~P}}), 10'h0}, qm0);
flopenr #(64) regk (clk, reset, load_regs, {qp_out0[63:39], (qp_out0[38:10] & {29{~P}}), 10'h0}, qp0);
endmodule // divconv
module adder #(parameter WIDTH=8)
(input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
input logic cin,
output logic [WIDTH-1:0] y,
output logic cout);
assign y = a + b;
assign {cout, y} = a + b + cin;
endmodule // adder

Some files were not shown because too many files have changed in this diff Show More