mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' into dcache
This commit is contained in:
commit
4549a9f1c9
.gitignore
riscv-coremark
coremark
README.md
barebones
core_list_join.ccore_main.ccore_matrix.ccore_state.ccore_util.ccoremark.hcoremark.md5cygwin
freebsd
linux
macos
posix
rtems
simple
riscv64-baremetal
wally-pipelined
config
coremark_bare
rv32icfd
rv64icfd
linux-testgen
WALLY-README.txt
buildroot-config-src
fix_mem.pygdbinit_qemuloglinux-testvectors
logAllBuildroot.shlogBuildrootMem.shtestvector-generation
regression
sim-wally-batch-rv32icfdsim-wally-rv32icfdwally-pipelined-batch-rv32icfd.dowally-pipelined-rv32icfd.dowave-all.do
wave-dos
src/fpu
FMA
add.svalign.svbooth.svcompressors.svexpgen.svexpgen1.svexpgen2.svflag.svflag1.svflag2.svfma.svfma1.svfma2.svlza.svmultiply.svnormalize.svround.svsign.svspecial.sv
convert_inputs.svconvert_inputs_div.svdivconv.svexception_div.svfaddcvt.svfclassify.svfcmp.svfctrl.svfcvt.svfhazard.svfma.svfma1.svfpdiv.svtbgen
fpdivsqrt
7
.gitignore
vendored
7
.gitignore
vendored
@ -28,5 +28,12 @@ wally-pipelined/linux-testgen/nohup*
|
||||
wally-pipelined/linux-testgen/x*
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs
|
||||
wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs/*
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/intermediate-outputs/git_create_dir.txt
|
||||
wally-pipelined/linux-testgen/buildroot/
|
||||
wally-pipelined/linux-testgen/buildroot-image-output
|
||||
wally-pipelined/linux-testgen/buildroot-config-src/main.config.old
|
||||
wally-pipelined/regression/slack-notifier/slack-webhook-url.txt
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
CoreMark's primary goals are simplicity and providing a method for testing only a processor's core features. For more information about EEMBC's comprehensive embedded benchmark suites, please see www.eembc.org.
|
||||
|
||||
For a more compute-intensive version of CoreMark that uses larger datasets and execution loops taken from common applications, please check out EEMBC's [CoreMark-PRO](https://www.github.com/eembc/coremark-pro) benchmark, also on GitHub.
|
||||
|
||||
# Building and Running
|
||||
|
||||
To build and run the benchmark, type
|
||||
@ -83,7 +85,9 @@ Use `XCFLAGS=-DMULTITHREAD=N` where N is number of threads to run in parallel. S
|
||||
% make XCFLAGS="-DMULTITHREAD=4 -DUSE_PTHREAD"
|
||||
~~~
|
||||
|
||||
Above will compile the benchmark for execution on 4 cores, using POSIX Threads API.
|
||||
The above will compile the benchmark for execution on 4 cores, using POSIX Threads API.
|
||||
|
||||
Note: linking may fail on the previous command if your linker does not automatically add the `pthread` library. If you encounter `undefined reference` errors, please modify the `core_portme.mak` file for your platform, (e.g. `linux/core_portme.mak`) and add `-lpthread` to the `LFLAGS_END` parameter.
|
||||
|
||||
# Run Parameters for the Benchmark Executable
|
||||
CoreMark's executable takes several parameters as follows (but only if `main()` accepts arguments):
|
||||
@ -109,7 +113,7 @@ The default for such a target when testing different configurations could be:
|
||||
|
||||
# Submitting Results
|
||||
|
||||
CoreMark results can be submitted on the web. Open a web browser and go to https://www.eembc.org/coremark/login.php?url=enter_score.php. After registering an account you may enter a score.
|
||||
CoreMark results can be submitted on the web. Open a web browser and go to the [submission page](https://www.eembc.org/coremark/submit.php). After registering an account you may enter a score.
|
||||
|
||||
# Run Rules
|
||||
What is and is not allowed.
|
||||
|
159
riscv-coremark/coremark/barebones/core_portme.c
Executable file → Normal file
159
riscv-coremark/coremark/barebones/core_portme.c
Executable file → Normal file
@ -19,110 +19,135 @@ Original Author: Shay Gal-on
|
||||
#include "core_portme.h"
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile=0x3415;
|
||||
volatile ee_s32 seed2_volatile=0x3415;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x0;
|
||||
volatile ee_s32 seed2_volatile=0x0;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x8;
|
||||
volatile ee_s32 seed2_volatile=0x8;
|
||||
volatile ee_s32 seed3_volatile=0x8;
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile=ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile=0;
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
|
||||
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
|
||||
Sample implementation for standard time.h and windows.h definitions included.
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
CORETIMETYPE barebones_clock() {
|
||||
#error "You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
|
||||
CORETIMETYPE
|
||||
barebones_clock()
|
||||
{
|
||||
#error \
|
||||
"You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
|
||||
}
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be measured.
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow does not occur.
|
||||
If there are issues with the return value overflowing, increase this value.
|
||||
*/
|
||||
#define GETMYTIME(_t) (*_t=barebones_clock())
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#define GETMYTIME(_t) (*_t = barebones_clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of the benchmark.
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
GETMYTIME(&start_time_val );
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the benchmark.
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or other system parameters - e.g. reading the current value of cpu cycles counter.
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void stop_time(void) {
|
||||
GETMYTIME(&stop_time_val );
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other value,
|
||||
as long as it can be converted to seconds by <time_in_secs>.
|
||||
This methodology is taken to accomodate any hardware or simulated platform.
|
||||
The sample implementation returns millisecs by default,
|
||||
and the resolution is controlled by <TIMER_RES_DIVIDER>
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accomodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for floating point.
|
||||
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
|
||||
The <secs_ret> type is used to accomodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts=1;
|
||||
ee_u32 default_num_contexts = 1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void portable_init(core_portable *p, int *argc, char *argv[])
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
#error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
|
||||
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id=1;
|
||||
#error \
|
||||
"Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id = 1;
|
||||
}
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
Target specific final code
|
||||
*/
|
||||
void portable_fini(core_portable *p)
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id=0;
|
||||
p->portable_id = 0;
|
||||
}
|
||||
|
||||
|
||||
|
179
riscv-coremark/coremark/barebones/core_portme.h
Executable file → Normal file
179
riscv-coremark/coremark/barebones/core_portme.h
Executable file → Normal file
@ -16,178 +16,189 @@ limitations under the License.
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on different platforms
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 1
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 1
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 1
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 0
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf function.
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 0
|
||||
#endif
|
||||
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS \
|
||||
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
#define NULL ((void *)0)
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
|
||||
This macro is used to align an offset to point to a 32b value. It is
|
||||
used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
#define CORETIMETYPE ee_u32
|
||||
#define CORETIMETYPE ee_u32
|
||||
typedef ee_u32 CORE_TICKS;
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
|
||||
to fit a particular architecture.
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S {
|
||||
ee_u8 portable_id;
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE==1200)
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
|
||||
&& !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE==2000)
|
||||
#elif (TOTAL_DATA_SIZE == 2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
|
164
riscv-coremark/coremark/barebones/cvt.c
Executable file → Normal file
164
riscv-coremark/coremark/barebones/cvt.c
Executable file → Normal file
@ -17,101 +17,111 @@ limitations under the License.
|
||||
#define CVTBUFSIZE 80
|
||||
static char CVTBUF[CVTBUFSIZE];
|
||||
|
||||
static char *cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
|
||||
static char *
|
||||
cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag)
|
||||
{
|
||||
int r2;
|
||||
double fi, fj;
|
||||
char *p, *p1;
|
||||
int r2;
|
||||
double fi, fj;
|
||||
char * p, *p1;
|
||||
|
||||
if (ndigits < 0) ndigits = 0;
|
||||
if (ndigits >= CVTBUFSIZE - 1) ndigits = CVTBUFSIZE - 2;
|
||||
r2 = 0;
|
||||
*sign = 0;
|
||||
p = &buf[0];
|
||||
if (arg < 0)
|
||||
{
|
||||
*sign = 1;
|
||||
arg = -arg;
|
||||
}
|
||||
arg = modf(arg, &fi);
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
if (ndigits < 0)
|
||||
ndigits = 0;
|
||||
if (ndigits >= CVTBUFSIZE - 1)
|
||||
ndigits = CVTBUFSIZE - 2;
|
||||
r2 = 0;
|
||||
*sign = 0;
|
||||
p = &buf[0];
|
||||
if (arg < 0)
|
||||
{
|
||||
*sign = 1;
|
||||
arg = -arg;
|
||||
}
|
||||
arg = modf(arg, &fi);
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
|
||||
if (fi != 0)
|
||||
{
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
while (fi != 0)
|
||||
if (fi != 0)
|
||||
{
|
||||
fj = modf(fi / 10, &fi);
|
||||
*--p1 = (int)((fj + .03) * 10) + '0';
|
||||
r2++;
|
||||
p1 = &buf[CVTBUFSIZE];
|
||||
while (fi != 0)
|
||||
{
|
||||
fj = modf(fi / 10, &fi);
|
||||
*--p1 = (int)((fj + .03) * 10) + '0';
|
||||
r2++;
|
||||
}
|
||||
while (p1 < &buf[CVTBUFSIZE])
|
||||
*p++ = *p1++;
|
||||
}
|
||||
while (p1 < &buf[CVTBUFSIZE]) *p++ = *p1++;
|
||||
}
|
||||
else if (arg > 0)
|
||||
{
|
||||
while ((fj = arg * 10) < 1)
|
||||
else if (arg > 0)
|
||||
{
|
||||
arg = fj;
|
||||
r2--;
|
||||
while ((fj = arg * 10) < 1)
|
||||
{
|
||||
arg = fj;
|
||||
r2--;
|
||||
}
|
||||
}
|
||||
}
|
||||
p1 = &buf[ndigits];
|
||||
if (eflag == 0) p1 += r2;
|
||||
*decpt = r2;
|
||||
if (p1 < &buf[0])
|
||||
{
|
||||
buf[0] = '\0';
|
||||
p1 = &buf[ndigits];
|
||||
if (eflag == 0)
|
||||
p1 += r2;
|
||||
*decpt = r2;
|
||||
if (p1 < &buf[0])
|
||||
{
|
||||
buf[0] = '\0';
|
||||
return buf;
|
||||
}
|
||||
while (p <= p1 && p < &buf[CVTBUFSIZE])
|
||||
{
|
||||
arg *= 10;
|
||||
arg = modf(arg, &fj);
|
||||
*p++ = (int)fj + '0';
|
||||
}
|
||||
if (p1 >= &buf[CVTBUFSIZE])
|
||||
{
|
||||
buf[CVTBUFSIZE - 1] = '\0';
|
||||
return buf;
|
||||
}
|
||||
p = p1;
|
||||
*p1 += 5;
|
||||
while (*p1 > '9')
|
||||
{
|
||||
*p1 = '0';
|
||||
if (p1 > buf)
|
||||
++*--p1;
|
||||
else
|
||||
{
|
||||
*p1 = '1';
|
||||
(*decpt)++;
|
||||
if (eflag == 0)
|
||||
{
|
||||
if (p > buf)
|
||||
*p = '0';
|
||||
p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return buf;
|
||||
}
|
||||
while (p <= p1 && p < &buf[CVTBUFSIZE])
|
||||
{
|
||||
arg *= 10;
|
||||
arg = modf(arg, &fj);
|
||||
*p++ = (int) fj + '0';
|
||||
}
|
||||
if (p1 >= &buf[CVTBUFSIZE])
|
||||
{
|
||||
buf[CVTBUFSIZE - 1] = '\0';
|
||||
return buf;
|
||||
}
|
||||
p = p1;
|
||||
*p1 += 5;
|
||||
while (*p1 > '9')
|
||||
{
|
||||
*p1 = '0';
|
||||
if (p1 > buf)
|
||||
++*--p1;
|
||||
else
|
||||
{
|
||||
*p1 = '1';
|
||||
(*decpt)++;
|
||||
if (eflag == 0)
|
||||
{
|
||||
if (p > buf) *p = '0';
|
||||
p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
char *ecvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
char *
|
||||
ecvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1);
|
||||
}
|
||||
|
||||
char *ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
char *
|
||||
ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 1);
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 1);
|
||||
}
|
||||
|
||||
char *fcvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
char *
|
||||
fcvt(double arg, int ndigits, int *decpt, int *sign)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
|
||||
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0);
|
||||
}
|
||||
|
||||
char *fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
char *
|
||||
fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf)
|
||||
{
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 0);
|
||||
return cvt(arg, ndigits, decpt, sign, buf, 0);
|
||||
}
|
||||
|
1067
riscv-coremark/coremark/barebones/ee_printf.c
Executable file → Normal file
1067
riscv-coremark/coremark/barebones/ee_printf.c
Executable file → Normal file
File diff suppressed because it is too large
Load Diff
@ -17,8 +17,8 @@ Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
//#include <stdlib.h>
|
||||
//#include <string.h>
|
||||
/*
|
||||
Topic: Description
|
||||
Benchmark using a linked list.
|
||||
@ -118,7 +118,7 @@ ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) {
|
||||
return a->idx - b->idx;
|
||||
}
|
||||
|
||||
void ehitoa(int value, char *str, int base){
|
||||
/*void ehitoa(int value, char *str, int base){
|
||||
if (value>100000) strcpy(str,"too big");
|
||||
else{
|
||||
int places[6] = {100000, 10000, 1000, 100, 10, 1};
|
||||
@ -135,7 +135,7 @@ void ehitoa(int value, char *str, int base){
|
||||
}
|
||||
str[6]=0;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
void copy_info(list_data *to,list_data *from) {
|
||||
to->data16=from->data16;
|
||||
@ -158,22 +158,22 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
list_head *finder, *remover;
|
||||
list_data info;
|
||||
ee_s16 i;
|
||||
ee_printf("entered corebenchlist \n");
|
||||
//ee_printf("entered corebenchlist \n");
|
||||
info.idx=finder_idx;
|
||||
/* find <find_num> values in the list, and change the list each time (reverse and cache if value found) */
|
||||
for (i=0; i<find_num; i++) {
|
||||
ee_printf("for loop \n");
|
||||
//ee_printf("for loop \n");
|
||||
info.data16= (i & 0xff) ;
|
||||
this_find=core_list_find(list,&info);
|
||||
list=core_list_reverse(list);
|
||||
if (this_find==NULL) {
|
||||
missed++;
|
||||
retval+=(list->next->info->data16 >> 8) & 1;
|
||||
ee_printf("if statement \n");
|
||||
//ee_printf("if statement \n");
|
||||
}
|
||||
else {
|
||||
found++;
|
||||
ee_printf("else statement \n");
|
||||
//ee_printf("else statement \n");
|
||||
if (this_find->info->data16 & 0x1) /* use found value */
|
||||
retval+=(this_find->info->data16 >> 9) & 1;
|
||||
/* and cache next item at the head of the list (if any) */
|
||||
@ -187,7 +187,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
if (info.idx>=0)
|
||||
info.idx++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found);
|
||||
//ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found);
|
||||
#endif
|
||||
}
|
||||
retval+=found*4-missed;
|
||||
@ -204,7 +204,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
finder=finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 1: %04x\n",retval);
|
||||
//ee_printf("List sort 1: %04x\n",retval);
|
||||
#endif
|
||||
remover=core_list_undo_remove(remover,list->next);
|
||||
/* sort the list by index, in effect returning the list to original state */
|
||||
@ -216,7 +216,7 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
finder=finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 2: %04x\n",retval);
|
||||
//ee_printf("List sort 2: %04x\n",retval);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
@ -235,26 +235,26 @@ ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
*/
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) {
|
||||
/* calculated pointers for the list */
|
||||
ee_printf("%d \n blksize", blksize);
|
||||
//ee_printf("%d \n blksize", blksize);
|
||||
ee_u32 per_item=16+sizeof(struct list_data_s);
|
||||
ee_printf("%d \n sizeof", sizeof(struct list_data_s));
|
||||
ee_printf("%d \n per_item", per_item);
|
||||
//ee_printf("%d \n sizeof", sizeof(struct list_data_s));
|
||||
//ee_printf("%d \n per_item", per_item);
|
||||
ee_u32 size=(blksize/per_item)-2;
|
||||
char bufftwo[200];
|
||||
ehitoa(size, bufftwo, 10);
|
||||
ee_printf(" size = %s done \n", bufftwo);
|
||||
ee_printf("%d", size);/* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */
|
||||
//char bufftwo[200];
|
||||
//ehitoa(size, bufftwo, 10);
|
||||
//ee_printf(" size = %s done \n", bufftwo);
|
||||
//ee_printf("%d", size);/* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */
|
||||
list_head *memblock_end=memblock+size;
|
||||
|
||||
list_data *datablock=(list_data *)(memblock_end);
|
||||
list_data *datablock_end=datablock+size;
|
||||
ee_printf("datablock_end");
|
||||
//ee_printf("datablock_end");
|
||||
/* some useful variables */
|
||||
ee_u32 i;
|
||||
list_head *finder,*list=memblock;
|
||||
list_data info;
|
||||
ehitoa(size, bufftwo, 10);
|
||||
ee_printf(" size2 = %s done \n", bufftwo);
|
||||
//ehitoa(size, bufftwo, 10);
|
||||
//ee_printf(" size2 = %s done \n", bufftwo);
|
||||
|
||||
/* create a fake items for the list head and tail */
|
||||
list->next=NULL;
|
||||
@ -265,58 +265,58 @@ list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) {
|
||||
datablock++;
|
||||
info.idx=0x7fff;
|
||||
info.data16=(ee_s16)0xffff;
|
||||
ehitoa(size, bufftwo, 10);
|
||||
ee_printf(" size3 = %s done \n", bufftwo);
|
||||
//ehitoa(size, bufftwo, 10);
|
||||
//ee_printf(" size3 = %s done \n", bufftwo);
|
||||
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
|
||||
ehitoa(size, bufftwo, 10);
|
||||
ee_printf(" size4 = %s done \n", bufftwo);;
|
||||
//ehitoa(size, bufftwo, 10);
|
||||
//ee_printf(" size4 = %s done \n", bufftwo);;
|
||||
/* then insert size items */
|
||||
for (i=0; i<size; i++) {
|
||||
ee_u16 datpat=((ee_u16)(seed^i) & 0xf);
|
||||
ee_u16 dat=(datpat<<3) | (i&0x7); /* alternate between algorithms */
|
||||
info.data16=(dat<<8) | dat; /* fill the data with actual data and upper bits with rebuild value */
|
||||
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" i = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" i = %s done \n", bufftwo);
|
||||
//ee_printf("%d \n", i);
|
||||
/*char grow[200];
|
||||
char growtwo[200];
|
||||
itoa(i, growtwo, 10);
|
||||
sprintf(grow, "test %u buff2 %s goodbyeadd \n", i, growtwo);*/
|
||||
}
|
||||
ee_printf("exited for \n");
|
||||
//ee_printf("exited for \n");
|
||||
/* and now index the list so we know initial seed order of the list */
|
||||
finder=list->next;
|
||||
i=1;
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" i = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" i = %s done \n", bufftwo);
|
||||
while (finder->next!=NULL) {
|
||||
ee_printf("enter while statement \n");
|
||||
//ee_printf("enter while statement \n");
|
||||
if (i<size/5){ /* first 20% of the list in order */
|
||||
finder->info->idx=i++;
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" if i = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" if i = %s done \n", bufftwo);
|
||||
}
|
||||
|
||||
else {
|
||||
ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */
|
||||
finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" else i = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" else i = %s done \n", bufftwo);
|
||||
}
|
||||
finder=finder->next;
|
||||
}
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" i2 = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" i2 = %s done \n", bufftwo);
|
||||
list = core_list_mergesort(list,cmp_idx,NULL);
|
||||
#if CORE_DEBUG
|
||||
ee_printf("Initialized list:\n");
|
||||
//ee_printf("Initialized list:\n");
|
||||
finder=list;
|
||||
while (finder) {
|
||||
ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16);
|
||||
//ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16);
|
||||
finder=finder->next;
|
||||
}
|
||||
ee_printf("\n");
|
||||
//ee_printf("\n");
|
||||
#endif
|
||||
return list;
|
||||
}
|
||||
@ -424,20 +424,22 @@ list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modifi
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
list_head *core_list_find(list_head *list,list_data *info) {
|
||||
ee_printf("entered core_list_find \n");
|
||||
//ee_printf("entered core_list_find \n");
|
||||
if (info->idx>=0) {
|
||||
ee_printf("find if \n");
|
||||
//ee_printf("find if \n");
|
||||
while (list && (list->info->idx != info->idx)){
|
||||
list=list->next;
|
||||
ee_printf("find while if \n");}
|
||||
ee_printf("core_list_find end \n");
|
||||
//ee_printf("find while if \n");
|
||||
}
|
||||
//ee_printf("core_list_find end \n");
|
||||
return list;
|
||||
} else {
|
||||
ee_printf("find else");
|
||||
//ee_printf("find else");
|
||||
while (list && ((list->info->data16 & 0xff) != info->data16)){
|
||||
list=list->next;
|
||||
ee_printf("find while else \n");}
|
||||
ee_printf("core list find end \n");
|
||||
//ee_printf("find while else \n");
|
||||
}
|
||||
//ee_printf("core list find end \n");
|
||||
return list;
|
||||
}
|
||||
}
|
||||
@ -456,7 +458,7 @@ list_head *core_list_find(list_head *list,list_data *info) {
|
||||
*/
|
||||
|
||||
list_head *core_list_reverse(list_head *list) {
|
||||
ee_printf("entered core_list_reverse");
|
||||
// ee_printf("entered core_list_reverse");
|
||||
list_head *next=NULL, *tmp;
|
||||
while (list) {
|
||||
tmp=list->next;
|
||||
@ -464,7 +466,7 @@ list_head *core_list_reverse(list_head *list) {
|
||||
next=list;
|
||||
list=tmp;
|
||||
}
|
||||
ee_printf("core_list_reverse done");
|
||||
//ee_printf("core_list_reverse done");
|
||||
return next;
|
||||
}
|
||||
/* Function: core_list_mergesort
|
||||
@ -493,27 +495,27 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
ee_s32 insize, nmerges, psize, qsize, i;
|
||||
|
||||
insize = 1;
|
||||
char bufftwo[200];
|
||||
//char bufftwo[200];
|
||||
while (1) {
|
||||
p = list;
|
||||
list = NULL;
|
||||
tail = NULL;
|
||||
|
||||
nmerges = 0; /* count number of merges we do in this pass */
|
||||
ehitoa(nmerges, bufftwo, 10);
|
||||
ee_printf(" nmerges default value = %s done \n", bufftwo);
|
||||
//ehitoa(nmerges, bufftwo, 10);
|
||||
//ee_printf(" nmerges default value = %s done \n", bufftwo);
|
||||
while (p) {
|
||||
nmerges++; /* there exists a merge to be done */
|
||||
ehitoa(nmerges, bufftwo, 10);
|
||||
ee_printf(" current nmerges = %s done \n", bufftwo);
|
||||
//ehitoa(nmerges, bufftwo, 10);
|
||||
//ee_printf(" current nmerges = %s done \n", bufftwo);
|
||||
/* step `insize' places along from p */
|
||||
q = p;
|
||||
psize = 0;
|
||||
ehitoa(insize, bufftwo, 10);
|
||||
ee_printf(" insize = %s done \n", bufftwo);
|
||||
//ehitoa(insize, bufftwo, 10);
|
||||
//ee_printf(" insize = %s done \n", bufftwo);
|
||||
for (i = 0; i < insize; i++) {
|
||||
ehitoa(i, bufftwo, 10);
|
||||
ee_printf(" i = %s done \n", bufftwo);
|
||||
//ehitoa(i, bufftwo, 10);
|
||||
//ee_printf(" i = %s done \n", bufftwo);
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q) break;
|
||||
@ -521,37 +523,37 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
|
||||
/* if q hasn't fallen off end, we have two lists to merge */
|
||||
qsize = insize;
|
||||
ehitoa(qsize, bufftwo, 10);
|
||||
ee_printf(" qsize = %s done \n", bufftwo);
|
||||
//ehitoa(qsize, bufftwo, 10);
|
||||
//ee_printf(" qsize = %s done \n", bufftwo);
|
||||
|
||||
/* now we have two lists; merge them */
|
||||
while (psize > 0 || (qsize > 0 && q)) {
|
||||
|
||||
/* decide whether next element of merge comes from p or q */
|
||||
if (psize == 0) {
|
||||
ee_printf("if \n");
|
||||
//ee_printf("if \n");
|
||||
/* p is empty; e must come from q. */
|
||||
e = q; q = q->next; qsize--;
|
||||
} else if (qsize == 0 || !q) {
|
||||
ee_printf("else if \n");
|
||||
//ee_printf("else if \n");
|
||||
/* q is empty; e must come from p. */
|
||||
e = p; p = p->next; psize--;
|
||||
} else if (cmp(p->info,q->info,res) <= 0) {
|
||||
ee_printf("else if 2 \n");
|
||||
//ee_printf("else if 2 \n");
|
||||
/* First element of p is lower (or same); e must come from p. */
|
||||
e = p; p = p->next; psize--;
|
||||
} else {
|
||||
ee_printf("else \n");
|
||||
//ee_printf("else \n");
|
||||
/* First element of q is lower; e must come from q. */
|
||||
e = q; q = q->next; qsize--;
|
||||
}
|
||||
|
||||
/* add the next element to the merged list */
|
||||
if (tail) {
|
||||
ee_printf("tail if \n");
|
||||
//ee_printf("tail if \n");
|
||||
tail->next = e;
|
||||
} else {
|
||||
ee_printf("tail else \n");
|
||||
//ee_printf("tail else \n");
|
||||
list = e;
|
||||
}
|
||||
tail = e;
|
||||
@ -569,8 +571,8 @@ list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
|
||||
/* Otherwise repeat, merging lists twice the size */
|
||||
insize *= 2;
|
||||
ehitoa(insize, bufftwo, 10);
|
||||
ee_printf(" insize2 = %s done \n", bufftwo);
|
||||
//ehitoa(insize, bufftwo, 10);
|
||||
//ee_printf(" insize2 = %s done \n", bufftwo);
|
||||
}
|
||||
#if COMPILER_REQUIRES_SORT_RETURN
|
||||
return list;
|
||||
|
@ -17,396 +17,431 @@ Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* File: core_main.c
|
||||
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
|
||||
This file contains the framework to acquire a block of memory, seed
|
||||
initial parameters, tun t he benchmark and report the results.
|
||||
*/
|
||||
#include "coremark.h"
|
||||
|
||||
/* Function: iterate
|
||||
Run the benchmark for a specified number of iterations.
|
||||
Run the benchmark for a specified number of iterations.
|
||||
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
|
||||
Returns:
|
||||
NULL.
|
||||
Returns:
|
||||
NULL.
|
||||
*/
|
||||
static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1};
|
||||
static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747};
|
||||
static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84};
|
||||
int gg_printf(const char *fmt, ...);
|
||||
int sendstring(const char *p);
|
||||
void _send_char(char c);
|
||||
void *iterate(void *pres) {
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res=(core_results *)pres;
|
||||
ee_u32 iterations=res->iterations;
|
||||
res->crc=0;
|
||||
res->crclist=0;
|
||||
res->crcmatrix=0;
|
||||
res->crcstate=0;
|
||||
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
|
||||
(ee_u16)0x3340,
|
||||
(ee_u16)0x6a79,
|
||||
(ee_u16)0xe714,
|
||||
(ee_u16)0xe3c1 };
|
||||
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
|
||||
(ee_u16)0x1199,
|
||||
(ee_u16)0x5608,
|
||||
(ee_u16)0x1fd7,
|
||||
(ee_u16)0x0747 };
|
||||
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
|
||||
(ee_u16)0x39bf,
|
||||
(ee_u16)0xe5a4,
|
||||
(ee_u16)0x8e3a,
|
||||
(ee_u16)0x8d84 };
|
||||
void *
|
||||
iterate(void *pres)
|
||||
{
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res = (core_results *)pres;
|
||||
ee_u32 iterations = res->iterations;
|
||||
res->crc = 0;
|
||||
res->crclist = 0;
|
||||
res->crcmatrix = 0;
|
||||
res->crcstate = 0;
|
||||
|
||||
for (i=0; i<iterations; i++) {
|
||||
crc=core_bench_list(res,1);
|
||||
res->crc=crcu16(crc,res->crc);
|
||||
crc=core_bench_list(res,-1);
|
||||
res->crc=crcu16(crc,res->crc);
|
||||
if (i==0) res->crclist=res->crc;
|
||||
}
|
||||
return NULL;
|
||||
for (i = 0; i < iterations; i++)
|
||||
{
|
||||
crc = core_bench_list(res, 1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
crc = core_bench_list(res, -1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
if (i == 0)
|
||||
res->crclist = res->crc;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (SEED_METHOD==SEED_ARG)
|
||||
#if (SEED_METHOD == SEED_ARG)
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
||||
#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv)
|
||||
#define get_seed_32(x) get_seed_args(x,argc,argv)
|
||||
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
|
||||
#define get_seed_32(x) get_seed_args(x, argc, argv)
|
||||
#else /* via function or volatile */
|
||||
ee_s32 get_seed_32(int i);
|
||||
#define get_seed(x) (ee_s16)get_seed_32(x)
|
||||
#define get_seed(x) (ee_s16) get_seed_32(x)
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD==MEM_STATIC)
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
||||
#endif
|
||||
char *mem_name[3] = {"Static","Heap","Stack"};
|
||||
char *mem_name[3] = { "Static", "Heap", "Stack" };
|
||||
/* Function: main
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
|
||||
1 - Initialize input seeds from a source that cannot be determined at compile time.
|
||||
2 - Initialize memory block for use.
|
||||
3 - Run and time the benchmark.
|
||||
4 - Report results, testing the validity of the output if the seeds are known.
|
||||
1 - Initialize input seeds from a source that cannot be determined at
|
||||
compile time. 2 - Initialize memory block for use. 3 - Run and time the
|
||||
benchmark. 4 - Report results, testing the validity of the output if the
|
||||
seeds are known.
|
||||
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be identical
|
||||
3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
|
||||
4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be
|
||||
identical 3 - third seed : Any value, should be at least an order of
|
||||
magnitude less then the input size, but bigger then 32. 4 - Iterations :
|
||||
Special, if set to 0, iterations will be automatically determined such that
|
||||
the benchmark will run between 10 to 100 secs
|
||||
|
||||
*/
|
||||
|
||||
#if MAIN_HAS_NOARGC
|
||||
MAIN_RETURN_TYPE main(void) {
|
||||
int argc=0;
|
||||
char *argv[1];
|
||||
MAIN_RETURN_TYPE
|
||||
main(void)
|
||||
{
|
||||
int argc = 0;
|
||||
char *argv[1];
|
||||
#else
|
||||
MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
||||
MAIN_RETURN_TYPE
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
#endif
|
||||
//const char s[] = "Elizabeth";
|
||||
//ee_printf("eeprint");
|
||||
//ee_printf("Trying to print: %d", 0);
|
||||
/*gg_printf("Elizabeth");*/
|
||||
//sendstring("Elizabeth");
|
||||
|
||||
//sendstring(s);
|
||||
//return(0);
|
||||
ee_u16 i,j=0,num_algorithms=0;
|
||||
ee_s16 known_id=-1,total_errors=0;
|
||||
ee_u16 seedcrc=0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD==MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD];
|
||||
ee_printf("SHOWTIME\n");
|
||||
ee_u16 i, j = 0, num_algorithms = 0;
|
||||
ee_s16 known_id = -1, total_errors = 0;
|
||||
ee_u16 seedcrc = 0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD == MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
|
||||
#endif
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s)>128) {
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1=get_seed(1);
|
||||
results[0].seed2=get_seed(2);
|
||||
results[0].seed3=get_seed(3);
|
||||
results[0].iterations=get_seed_32(4);
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s) > 128)
|
||||
{
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1 = get_seed(1);
|
||||
results[0].seed2 = get_seed(2);
|
||||
results[0].seed3 = get_seed(3);
|
||||
results[0].iterations = get_seed_32(4);
|
||||
#if CORE_DEBUG
|
||||
results[0].iterations=1;
|
||||
results[0].iterations = 1;
|
||||
#endif
|
||||
results[0].execs=get_seed_32(5);
|
||||
if (results[0].execs==0) { /* if not supplied, execute all algorithms */
|
||||
results[0].execs=ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */
|
||||
results[0].seed1=0;
|
||||
results[0].seed2=0;
|
||||
results[0].seed3=0x66;
|
||||
}
|
||||
if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */
|
||||
results[0].seed1=0x3415;
|
||||
results[0].seed2=0x3415;
|
||||
results[0].seed3=0x66;
|
||||
}
|
||||
#if (MEM_METHOD==MEM_STATIC)
|
||||
results[0].memblock[0]=(void *)static_memblk;
|
||||
results[0].size=TOTAL_DATA_SIZE;
|
||||
ee_printf("%d \n total data size", TOTAL_DATA_SIZE);
|
||||
results[0].err=0;
|
||||
#if (MULTITHREAD>1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD==MEM_MALLOC)
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
ee_s32 malloc_override=get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size=malloc_override;
|
||||
ee_printf("%d \n malloc datasize", malloc_override);
|
||||
else
|
||||
results[i].size=TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0]=portable_malloc(results[i].size);
|
||||
results[i].seed1=results[0].seed1;
|
||||
results[i].seed2=results[0].seed2;
|
||||
results[i].seed3=results[0].seed3;
|
||||
results[i].err=0;
|
||||
results[i].execs=results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD==MEM_STACK)
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
results[i].memblock[0]=stack_memblock+i*TOTAL_DATA_SIZE;
|
||||
results[i].size=TOTAL_DATA_SIZE;
|
||||
results[i].seed1=results[0].seed1;
|
||||
results[i].seed2=results[0].seed2;
|
||||
results[i].seed3=results[0].seed3;
|
||||
results[i].err=0;
|
||||
results[i].execs=results[0].execs;
|
||||
}
|
||||
results[0].execs = get_seed_32(5);
|
||||
if (results[0].execs == 0)
|
||||
{ /* if not supplied, execute all algorithms */
|
||||
results[0].execs = ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* perfromance run */
|
||||
results[0].seed1 = 0;
|
||||
results[0].seed2 = 0;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* validation run */
|
||||
results[0].seed1 = 0x3415;
|
||||
results[0].seed2 = 0x3415;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
results[0].memblock[0] = (void *)static_memblk;
|
||||
results[0].size = TOTAL_DATA_SIZE;
|
||||
results[0].err = 0;
|
||||
#if (MULTITHREAD > 1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
ee_s32 malloc_override = get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size = malloc_override;
|
||||
else
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0] = portable_malloc(results[i].size);
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD == MEM_STACK)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#else
|
||||
#error "Please define a way to initialize a memory block."
|
||||
#endif
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i=0; i<NUM_ALGORITHMS; i++) {
|
||||
if ((1<<(ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i=0 ; i<MULTITHREAD; i++)
|
||||
results[i].size=results[i].size/num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i=0; i<NUM_ALGORITHMS; i++) {
|
||||
ee_u32 ctx;
|
||||
if ((1<<(ee_u32)i) & results[0].execs) {
|
||||
for (ctx=0 ; ctx<MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i+1]=(char *)(results[ctx].memblock[0])+results[0].size*j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
if (results[i].execs & ID_LIST) {
|
||||
ee_printf("loop");
|
||||
ee_printf("%d \n", MULTITHREAD);
|
||||
ee_printf("%d \n sizethread ", results[0].size);
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
results[i].size = results[i].size / num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
ee_u32 ctx;
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
{
|
||||
for (ctx = 0; ctx < MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i + 1]
|
||||
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
if (results[i].execs & ID_LIST)
|
||||
{
|
||||
results[i].list = core_list_init(
|
||||
results[0].size, results[i].memblock[1], results[i].seed1);
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX)
|
||||
{
|
||||
core_init_matrix(results[0].size,
|
||||
results[i].memblock[2],
|
||||
(ee_s32)results[i].seed1
|
||||
| (((ee_s32)results[i].seed2) << 16),
|
||||
&(results[i].mat));
|
||||
}
|
||||
if (results[i].execs & ID_STATE)
|
||||
{
|
||||
core_init_state(
|
||||
results[0].size, results[i].seed1, results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
results[i].list=core_list_init(results[0].size,results[i].memblock[1],results[i].seed1);
|
||||
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX) {
|
||||
core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat) );
|
||||
}
|
||||
if (results[i].execs & ID_STATE) {
|
||||
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/*int foreverLoop = 1;
|
||||
secs_ret timing = 0;
|
||||
int timingInt;
|
||||
ee_printf("\nENTERING FOREVER WHILE LOOP\n");
|
||||
while(foreverLoop == 1)
|
||||
{
|
||||
start_time();
|
||||
//filler
|
||||
stop_time();
|
||||
timing += time_in_secs(get_time());
|
||||
timingInt = (int)timing;
|
||||
ee_printf("Timing is %d\n", timingInt);
|
||||
}/*
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations==0) {
|
||||
secs_ret secs_passed=0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations=1;
|
||||
int iterationInc = 0;
|
||||
ee_printf("\n\nENTERING ITERATION WHILE LOOP\n");
|
||||
while (secs_passed < (secs_ret)1) {
|
||||
if(iterationInc != 0)
|
||||
{
|
||||
results[0].iterations++;
|
||||
}
|
||||
ee_printf("iterations is %d\n", results[0].iterations);
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed = time_in_secs(get_time());
|
||||
int secs_passed_int = (int)secs_passed;
|
||||
ee_printf("secs passed is %d\n", secs_passed_int);
|
||||
iterationInc++;
|
||||
}
|
||||
ee_printf("LEAVING ITERATION WHILE LOOP!\n\n");
|
||||
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
|
||||
divisor=(ee_u32)secs_passed;
|
||||
ee_printf("divisor is %lu\n", divisor);
|
||||
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
|
||||
divisor=1;
|
||||
results[0].iterations*=1+10/divisor;
|
||||
ee_printf("iterations is %d\n", results[0].iterations);
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
ee_printf("Starting benchmark\n");
|
||||
start_time();
|
||||
#if (MULTITHREAD>1)
|
||||
if (default_num_contexts>MULTITHREAD) {
|
||||
default_num_contexts=MULTITHREAD;
|
||||
}
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
results[i].iterations=results[0].iterations;
|
||||
results[i].execs=results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations == 0)
|
||||
{
|
||||
secs_ret secs_passed = 0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations = 1;
|
||||
while (secs_passed < (secs_ret)1)
|
||||
{
|
||||
results[0].iterations *= 10;
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed = time_in_secs(get_time());
|
||||
}
|
||||
/* now we know it executes for at least 1 sec, set actual run time at
|
||||
* about 10 secs */
|
||||
divisor = (ee_u32)secs_passed;
|
||||
if (divisor == 0) /* some machines cast float to int as 0 since this
|
||||
conversion is not defined by ANSI, but we know at
|
||||
least one second passed */
|
||||
divisor = 1;
|
||||
results[0].iterations *= 1 + 10 / divisor;
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
start_time();
|
||||
#if (MULTITHREAD > 1)
|
||||
if (default_num_contexts > MULTITHREAD)
|
||||
{
|
||||
default_num_contexts = MULTITHREAD;
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].iterations = results[0].iterations;
|
||||
results[i].execs = results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
#else
|
||||
iterate(&results[0]);
|
||||
iterate(&results[0]);
|
||||
#endif
|
||||
stop_time();
|
||||
total_time=get_time();
|
||||
ee_printf("total time is %u\n", total_time);
|
||||
ee_printf("ending benchmark\n");
|
||||
/* get a function of the input to report */
|
||||
seedcrc=crc16(results[0].seed1,seedcrc);
|
||||
seedcrc=crc16(results[0].seed2,seedcrc);
|
||||
seedcrc=crc16(results[0].seed3,seedcrc);
|
||||
seedcrc=crc16(results[0].size,seedcrc);
|
||||
|
||||
switch (seedcrc) { /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id=0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */
|
||||
known_id=1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */
|
||||
known_id=2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id=3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */
|
||||
known_id=4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors=-1;
|
||||
break;
|
||||
}
|
||||
if (known_id>=0) {
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
results[i].err=0;
|
||||
if ((results[i].execs & ID_LIST) &&
|
||||
(results[i].crclist!=list_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",i,results[i].crclist,list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX) &&
|
||||
(results[i].crcmatrix!=matrix_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",i,results[i].crcmatrix,matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE) &&
|
||||
(results[i].crcstate!=state_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",i,results[i].crcstate,state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors+=results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors+=check_data_types();
|
||||
/* and report results */
|
||||
//ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size);
|
||||
ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size);
|
||||
ee_printf("Total ticks : %lu\n", (long unsigned) total_time);
|
||||
#if HAS_FLOAT
|
||||
ee_printf("Total time (secs): %f\n",time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
|
||||
#else
|
||||
ee_printf("Total time (secs): %d\n,time_in_secs(total_time)");
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
|
||||
#endif
|
||||
if (time_in_secs(total_time) < 10) {
|
||||
ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n");
|
||||
total_errors++;
|
||||
}
|
||||
stop_time();
|
||||
total_time = get_time();
|
||||
/* get a function of the input to report */
|
||||
seedcrc = crc16(results[0].seed1, seedcrc);
|
||||
seedcrc = crc16(results[0].seed2, seedcrc);
|
||||
seedcrc = crc16(results[0].seed3, seedcrc);
|
||||
seedcrc = crc16(results[0].size, seedcrc);
|
||||
|
||||
ee_printf("Iterations : %lu\n", (long unsigned) default_num_contexts*results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n",COMPILER_VERSION);
|
||||
ee_printf("Compiler flags : %s\n",COMPILER_FLAGS);
|
||||
#if (MULTITHREAD>1)
|
||||
ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts);
|
||||
#endif
|
||||
ee_printf("Memory location : %s\n",MEM_LOCATION);
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n",seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n",i,results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n",i,results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
|
||||
if (total_errors==0) {
|
||||
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
|
||||
switch (seedcrc)
|
||||
{ /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id = 0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
|
||||
algorithm */
|
||||
known_id = 1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
|
||||
*/
|
||||
known_id = 2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id = 3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
|
||||
algorithm */
|
||||
known_id = 4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors = -1;
|
||||
break;
|
||||
}
|
||||
if (known_id >= 0)
|
||||
{
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].err = 0;
|
||||
if ((results[i].execs & ID_LIST)
|
||||
&& (results[i].crclist != list_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crclist,
|
||||
list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX)
|
||||
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcmatrix,
|
||||
matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE)
|
||||
&& (results[i].crcstate != state_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcstate,
|
||||
state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors += results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors += check_data_types();
|
||||
/* and report results */
|
||||
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
|
||||
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
|
||||
#if HAS_FLOAT
|
||||
if (known_id==3) {
|
||||
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
|
||||
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %f\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
#else
|
||||
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %d\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
#endif
|
||||
if (time_in_secs(total_time) < 10)
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Must execute for at least 10 secs for a valid result!\n");
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
ee_printf("Iterations : %lu\n",
|
||||
(long unsigned)default_num_contexts * results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
|
||||
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
|
||||
#endif
|
||||
ee_printf("Memory location : %s\n", MEM_LOCATION);
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n", seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
|
||||
if (total_errors == 0)
|
||||
{
|
||||
ee_printf(
|
||||
"Correct operation validated. See README.md for run and reporting "
|
||||
"rules.\n");
|
||||
#if HAS_FLOAT
|
||||
if (known_id == 3)
|
||||
{
|
||||
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
|
||||
secs_ret totalmsecs = time_in_secs(total_time);
|
||||
int totalmint = (int) totalmsecs;
|
||||
ee_printf("ELAPSED S: %d\n", totalmint);
|
||||
ee_printf("ELAPSED TIME: %d\n", totalmint);
|
||||
|
||||
ee_printf("CoreMark 1.0 : %d / %s %s\n",tmp,COMPILER_VERSION,COMPILER_FLAGS);
|
||||
ee_printf("CoreMark 1.0 : %d / %s %s",
|
||||
tmp,
|
||||
COMPILER_VERSION,
|
||||
COMPILER_FLAGS);
|
||||
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
||||
ee_printf(" / %s",MEM_LOCATION);
|
||||
ee_printf(" / %s", MEM_LOCATION);
|
||||
#else
|
||||
ee_printf(" / %s",mem_name[MEM_METHOD]);
|
||||
ee_printf(" / %s", mem_name[MEM_METHOD]);
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD>1)
|
||||
ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD);
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
|
||||
#endif
|
||||
ee_printf("\n");
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (total_errors>0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors<0)
|
||||
ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n");
|
||||
}
|
||||
if (total_errors > 0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors < 0)
|
||||
ee_printf(
|
||||
"Cannot validate operation for these seed values, please compare "
|
||||
"with results on a known platform.\n");
|
||||
|
||||
#if (MEM_METHOD==MEM_MALLOC)
|
||||
for (i=0 ; i<MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#if (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#endif
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
|
||||
return MAIN_RETURN_VAL;
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
|
||||
//pls
|
||||
|
||||
|
||||
|
@ -19,290 +19,341 @@ Original Author: Shay Gal-on
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Matrix manipulation benchmark
|
||||
|
||||
This very simple algorithm forms the basis of many more complex algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as well as hardware based)
|
||||
and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all zero).
|
||||
NxN Matrix B - initialized with medium values (upper half of the bits all zero).
|
||||
NxN Matrix C - used for the result.
|
||||
Matrix manipulation benchmark
|
||||
|
||||
The actual values for A and B must be derived based on input that is not available at compile time.
|
||||
This very simple algorithm forms the basis of many more complex
|
||||
algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as
|
||||
well as hardware based) and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
|
||||
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
|
||||
zero). NxN Matrix C - used for the result.
|
||||
|
||||
The actual values for A and B must be derived based on input that is not
|
||||
available at compile time.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
|
||||
#define matrix_test_next(x) (x+1)
|
||||
#define matrix_clip(x,y) ((y) ? (x) & 0x0ff : (x) & 0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x,from,to) (((x)>>(from)) & (~(0xffffffff << (to))))
|
||||
#define matrix_test_next(x) (x + 1)
|
||||
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
|
||||
|
||||
#if CORE_DEBUG
|
||||
void printmat(MATDAT *A, ee_u32 N, char *name) {
|
||||
ee_u32 i,j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
if (j!=0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d",A[i*N+j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
void
|
||||
printmat(MATDAT *A, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", A[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
void printmatC(MATRES *C, ee_u32 N, char *name) {
|
||||
ee_u32 i,j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
if (j!=0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d",C[i*N+j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
void
|
||||
printmatC(MATRES *C, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", C[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Function: core_bench_matrix
|
||||
Benchmark function
|
||||
Benchmark function
|
||||
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
*/
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) {
|
||||
ee_u32 N=p->N;
|
||||
MATRES *C=p->C;
|
||||
MATDAT *A=p->A;
|
||||
MATDAT *B=p->B;
|
||||
MATDAT val=(MATDAT)seed;
|
||||
ee_u16
|
||||
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
|
||||
{
|
||||
ee_u32 N = p->N;
|
||||
MATRES *C = p->C;
|
||||
MATDAT *A = p->A;
|
||||
MATDAT *B = p->B;
|
||||
MATDAT val = (MATDAT)seed;
|
||||
|
||||
crc=crc16(matrix_test(N,C,A,B,val),crc);
|
||||
crc = crc16(matrix_test(N, C, A, B, val), crc);
|
||||
|
||||
return crc;
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function: matrix_test
|
||||
Perform matrix manipulation.
|
||||
Perform matrix manipulation.
|
||||
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
|
||||
Operation:
|
||||
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
Operation:
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) {
|
||||
ee_u16 crc=0;
|
||||
MATDAT clipval=matrix_big(val);
|
||||
ee_s16
|
||||
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
|
||||
{
|
||||
ee_u16 crc = 0;
|
||||
MATDAT clipval = matrix_big(val);
|
||||
|
||||
matrix_add_const(N,A,val); /* make sure data changes */
|
||||
matrix_add_const(N, A, val); /* make sure data changes */
|
||||
#if CORE_DEBUG
|
||||
printmat(A,N,"matrix_add_const");
|
||||
printmat(A, N, "matrix_add_const");
|
||||
#endif
|
||||
matrix_mul_const(N,C,A,val);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
matrix_mul_const(N, C, A, val);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_const");
|
||||
printmatC(C, N, "matrix_mul_const");
|
||||
#endif
|
||||
matrix_mul_vect(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
matrix_mul_vect(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_vect");
|
||||
printmatC(C, N, "matrix_mul_vect");
|
||||
#endif
|
||||
matrix_mul_matrix(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
matrix_mul_matrix(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_matrix");
|
||||
printmatC(C, N, "matrix_mul_matrix");
|
||||
#endif
|
||||
matrix_mul_matrix_bitextract(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
matrix_mul_matrix_bitextract(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_matrix_bitextract");
|
||||
printmatC(C, N, "matrix_mul_matrix_bitextract");
|
||||
#endif
|
||||
|
||||
matrix_add_const(N,A,-val); /* return matrix to initial value */
|
||||
return crc;
|
||||
|
||||
matrix_add_const(N, A, -val); /* return matrix to initial value */
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function : matrix_init
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be determined at compile time
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) {
|
||||
ee_u32 N=0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order=1;
|
||||
MATDAT val;
|
||||
ee_u32 i=0,j=0;
|
||||
if (seed==0)
|
||||
seed=1;
|
||||
while (j<blksize) {
|
||||
i++;
|
||||
j=i*i*2*4;
|
||||
}
|
||||
N=i-1;
|
||||
A=(MATDAT *)align_mem(memblk);
|
||||
B=A+N*N;
|
||||
ee_u32
|
||||
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
|
||||
{
|
||||
ee_u32 N = 0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order = 1;
|
||||
MATDAT val;
|
||||
ee_u32 i = 0, j = 0;
|
||||
if (seed == 0)
|
||||
seed = 1;
|
||||
while (j < blksize)
|
||||
{
|
||||
i++;
|
||||
j = i * i * 2 * 4;
|
||||
}
|
||||
N = i - 1;
|
||||
A = (MATDAT *)align_mem(memblk);
|
||||
B = A + N * N;
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
seed = ( ( order * seed ) % 65536 );
|
||||
val = (seed + order);
|
||||
val=matrix_clip(val,0);
|
||||
B[i*N+j] = val;
|
||||
val = (val + order);
|
||||
val=matrix_clip(val,1);
|
||||
A[i*N+j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
seed = ((order * seed) % 65536);
|
||||
val = (seed + order);
|
||||
val = matrix_clip(val, 0);
|
||||
B[i * N + j] = val;
|
||||
val = (val + order);
|
||||
val = matrix_clip(val, 1);
|
||||
A[i * N + j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
|
||||
p->A=A;
|
||||
p->B=B;
|
||||
p->C=(MATRES *)align_mem(B+N*N);
|
||||
p->N=N;
|
||||
p->A = A;
|
||||
p->B = B;
|
||||
p->C = (MATRES *)align_mem(B + N * N);
|
||||
p->N = N;
|
||||
#if CORE_DEBUG
|
||||
printmat(A,N,"A");
|
||||
printmat(B,N,"B");
|
||||
printmat(A, N, "A");
|
||||
printmat(B, N, "B");
|
||||
#endif
|
||||
return N;
|
||||
return N;
|
||||
}
|
||||
|
||||
/* Function: matrix_sum
|
||||
Calculate a function that depends on the values of elements in the matrix.
|
||||
Calculate a function that depends on the values of elements in the
|
||||
matrix.
|
||||
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
*/
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) {
|
||||
MATRES tmp=0,prev=0,cur=0;
|
||||
ee_s16 ret=0;
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
cur=C[i*N+j];
|
||||
tmp+=cur;
|
||||
if (tmp>clipval) {
|
||||
ret+=10;
|
||||
tmp=0;
|
||||
} else {
|
||||
ret += (cur>prev) ? 1 : 0;
|
||||
}
|
||||
prev=cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
ee_s16
|
||||
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
|
||||
{
|
||||
MATRES tmp = 0, prev = 0, cur = 0;
|
||||
ee_s16 ret = 0;
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
cur = C[i * N + j];
|
||||
tmp += cur;
|
||||
if (tmp > clipval)
|
||||
{
|
||||
ret += 10;
|
||||
tmp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += (cur > prev) ? 1 : 0;
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_const
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
*/
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=(MATRES)A[i*N+j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
void
|
||||
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_add_const
|
||||
Add a constant value to all elements of a matrix.
|
||||
Add a constant value to all elements of a matrix.
|
||||
*/
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
A[i*N+j] += val;
|
||||
}
|
||||
}
|
||||
void
|
||||
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
A[i * N + j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_vect
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of
|
||||
coefficients is applied to the matrix.)
|
||||
*/
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
C[i]=0;
|
||||
for (j=0; j<N; j++) {
|
||||
C[i]+=(MATRES)A[i*N+j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
void
|
||||
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
C[i] = 0;
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as scaling.
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j,k;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=0;
|
||||
for(k=0;k<N;k++)
|
||||
{
|
||||
C[i*N+j]+=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
void
|
||||
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix_bitextract
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as scaling.
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j,k;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=0;
|
||||
for(k=0;k<N;k++)
|
||||
{
|
||||
MATRES tmp=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
|
||||
C[i*N+j]+=bit_extract(tmp,2,4)*bit_extract(tmp,5,7);
|
||||
}
|
||||
}
|
||||
}
|
||||
void
|
||||
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,260 +18,313 @@ Original Author: Shay Gal-on
|
||||
|
||||
#include "coremark.h"
|
||||
/* local functions */
|
||||
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count);
|
||||
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
|
||||
|
||||
/*
|
||||
Topic: Description
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table implementation is used instead,
|
||||
trading speed of direct coding for ease of maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour,
|
||||
we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table
|
||||
implementation is used instead, trading speed of direct coding for ease of
|
||||
maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise
|
||||
the switch/if behaviour, we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
*/
|
||||
|
||||
/* Function: core_bench_state
|
||||
Benchmark function
|
||||
Benchmark function
|
||||
|
||||
Go over the input twice, once direct, and once after introducing some corruption.
|
||||
Go over the input twice, once direct, and once after introducing some
|
||||
corruption.
|
||||
*/
|
||||
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
|
||||
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc)
|
||||
ee_u16
|
||||
core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc)
|
||||
{
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p=memblock;
|
||||
ee_u32 i;
|
||||
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p = memblock;
|
||||
ee_u32 i;
|
||||
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc);
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
|
||||
#endif
|
||||
for (i=0; i<NUM_CORE_STATES; i++) {
|
||||
final_counts[i]=track_counts[i]=0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p!=0) {
|
||||
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
|
||||
final_counts[fstate]++;
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
final_counts[i] = track_counts[i] = 0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,",fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
}
|
||||
#endif
|
||||
p=memblock;
|
||||
while (p < (memblock+blksize)) { /* insert some corruption */
|
||||
if (*p!=',')
|
||||
*p^=(ee_u8)seed1;
|
||||
p+=step;
|
||||
}
|
||||
p=memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p!=0) {
|
||||
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
|
||||
final_counts[fstate]++;
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* insert some corruption */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed1;
|
||||
p += step;
|
||||
}
|
||||
p = memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,",fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
}
|
||||
#endif
|
||||
p=memblock;
|
||||
while (p < (memblock+blksize)) { /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p!=',')
|
||||
*p^=(ee_u8)seed2;
|
||||
p+=step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i=0; i<NUM_CORE_STATES; i++) {
|
||||
crc=crcu32(final_counts[i],crc);
|
||||
crc=crcu32(track_counts[i],crc);
|
||||
}
|
||||
return crc;
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed2;
|
||||
p += step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
crc = crcu32(final_counts[i], crc);
|
||||
crc = crcu32(track_counts[i], crc);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Default initialization patterns */
|
||||
static ee_u8 *intpat[4] ={(ee_u8 *)"5012",(ee_u8 *)"1234",(ee_u8 *)"-874",(ee_u8 *)"+122"};
|
||||
static ee_u8 *floatpat[4]={(ee_u8 *)"35.54400",(ee_u8 *)".1234500",(ee_u8 *)"-110.700",(ee_u8 *)"+0.64400"};
|
||||
static ee_u8 *scipat[4] ={(ee_u8 *)"5.500e+3",(ee_u8 *)"-.123e-2",(ee_u8 *)"-87e+832",(ee_u8 *)"+0.6e-12"};
|
||||
static ee_u8 *errpat[4] ={(ee_u8 *)"T0.3e-1F",(ee_u8 *)"-T.T++Tq",(ee_u8 *)"1T3.4e4z",(ee_u8 *)"34.0e-T^"};
|
||||
static ee_u8 *intpat[4]
|
||||
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
|
||||
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
|
||||
(ee_u8 *)".1234500",
|
||||
(ee_u8 *)"-110.700",
|
||||
(ee_u8 *)"+0.64400" };
|
||||
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
|
||||
(ee_u8 *)"-.123e-2",
|
||||
(ee_u8 *)"-87e+832",
|
||||
(ee_u8 *)"+0.6e-12" };
|
||||
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
|
||||
(ee_u8 *)"-T.T++Tq",
|
||||
(ee_u8 *)"1T3.4e4z",
|
||||
(ee_u8 *)"34.0e-T^" };
|
||||
|
||||
/* Function: core_init_state
|
||||
Initialize the input data for the state machine.
|
||||
Initialize the input data for the state machine.
|
||||
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be determined at compile time
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) {
|
||||
ee_u32 total=0,next=0,i;
|
||||
ee_u8 *buf=0;
|
||||
void
|
||||
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
|
||||
{
|
||||
ee_u32 total = 0, next = 0, i;
|
||||
ee_u8 *buf = 0;
|
||||
#if CORE_DEBUG
|
||||
ee_u8 *start=p;
|
||||
ee_printf("State: %d,%d\n",size,seed);
|
||||
ee_u8 *start = p;
|
||||
ee_printf("State: %d,%d\n", size, seed);
|
||||
#endif
|
||||
size--;
|
||||
next=0;
|
||||
while ((total+next+1)<size) {
|
||||
if (next>0) {
|
||||
for(i=0;i<next;i++)
|
||||
*(p+total+i)=buf[i];
|
||||
*(p+total+i)=',';
|
||||
total+=next+1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7) {
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf=intpat[(seed>>3) & 0x3];
|
||||
next=4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf=floatpat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf=scipat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf=errpat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total<size) { /* fill the rest with 0 */
|
||||
*(p+total)=0;
|
||||
total++;
|
||||
}
|
||||
size--;
|
||||
next = 0;
|
||||
while ((total + next + 1) < size)
|
||||
{
|
||||
if (next > 0)
|
||||
{
|
||||
for (i = 0; i < next; i++)
|
||||
*(p + total + i) = buf[i];
|
||||
*(p + total + i) = ',';
|
||||
total += next + 1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7)
|
||||
{
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf = intpat[(seed >> 3) & 0x3];
|
||||
next = 4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf = floatpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf = scipat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf = errpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total < size)
|
||||
{ /* fill the rest with 0 */
|
||||
*(p + total) = 0;
|
||||
total++;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Input: %s\n",start);
|
||||
ee_printf("State Input: %s\n", start);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ee_u8 ee_isdigit(ee_u8 c) {
|
||||
ee_u8 retval;
|
||||
retval = ((c>='0') & (c<='9')) ? 1 : 0;
|
||||
return retval;
|
||||
static ee_u8
|
||||
ee_isdigit(ee_u8 c)
|
||||
{
|
||||
ee_u8 retval;
|
||||
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Function: core_state_transition
|
||||
Actual state machine.
|
||||
Actual state machine.
|
||||
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detcted.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid).
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detcted.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the
|
||||
end state is returned (either specific format determined or invalid).
|
||||
*/
|
||||
|
||||
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) {
|
||||
ee_u8 *str=*instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state=CORE_START;
|
||||
for( ; *str && state != CORE_INVALID; str++ ) {
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL==',') /* end of this input */ {
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch(state) {
|
||||
case CORE_START:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) {
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr=str;
|
||||
return state;
|
||||
enum CORE_STATE
|
||||
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
|
||||
{
|
||||
ee_u8 * str = *instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state = CORE_START;
|
||||
for (; *str && state != CORE_INVALID; str++)
|
||||
{
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL == ',') /* end of this input */
|
||||
{
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch (state)
|
||||
{
|
||||
case CORE_START:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
|
||||
{
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr = str;
|
||||
return state;
|
||||
}
|
||||
|
@ -18,193 +18,232 @@ Original Author: Shay Gal-on
|
||||
|
||||
#include "coremark.h"
|
||||
/* Function: get_seed
|
||||
Get a values that cannot be determined at compile time.
|
||||
Get a values that cannot be determined at compile time.
|
||||
|
||||
Since different embedded systems and compilers are used, 3 different methods are provided:
|
||||
1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that
|
||||
reads the value of a volatile variable from memory at run time.
|
||||
Please note, if using this method, you would need to modify core_portme.c to generate training profile.
|
||||
2 - Command line arguments. This is the preferred method if command line arguments are supported.
|
||||
3 - System function. If none of the first 2 methods is available on the platform,
|
||||
a system function which is not a stub can be used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions.
|
||||
Since different embedded systems and compilers are used, 3 different
|
||||
methods are provided: 1 - Using a volatile variable. This method is only
|
||||
valid if the compiler is forced to generate code that reads the value of a
|
||||
volatile variable from memory at run time. Please note, if using this method,
|
||||
you would need to modify core_portme.c to generate training profile. 2 -
|
||||
Command line arguments. This is the preferred method if command line
|
||||
arguments are supported. 3 - System function. If none of the first 2 methods
|
||||
is available on the platform, a system function which is not a stub can be
|
||||
used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke
|
||||
special simulator functions.
|
||||
*/
|
||||
#if (SEED_METHOD==SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32 get_seed_32(int i) {
|
||||
ee_s32 retval;
|
||||
switch (i) {
|
||||
case 1:
|
||||
retval=seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval=seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval=seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval=seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval=seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval=0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD==SEED_ARG)
|
||||
ee_s32 parseval(char *valstring) {
|
||||
ee_s32 retval=0;
|
||||
ee_s32 neg=1;
|
||||
int hexmode=0;
|
||||
if (*valstring == '-') {
|
||||
neg=-1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x')) {
|
||||
hexmode=1;
|
||||
valstring+=2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode) {
|
||||
while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) {
|
||||
ee_s32 digit=*valstring-'0';
|
||||
if (digit>9)
|
||||
digit=10+*valstring-'a';
|
||||
retval*=16;
|
||||
retval+=digit;
|
||||
valstring++;
|
||||
}
|
||||
} else {
|
||||
while ((*valstring >= '0') && (*valstring <= '9')) {
|
||||
ee_s32 digit=*valstring-'0';
|
||||
retval*=10;
|
||||
retval+=digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring=='K')
|
||||
retval*=1024;
|
||||
if (*valstring=='M')
|
||||
retval*=1024*1024;
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval = seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval = seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval = seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval = seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD == SEED_ARG)
|
||||
ee_s32
|
||||
parseval(char *valstring)
|
||||
{
|
||||
ee_s32 retval = 0;
|
||||
ee_s32 neg = 1;
|
||||
int hexmode = 0;
|
||||
if (*valstring == '-')
|
||||
{
|
||||
neg = -1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x'))
|
||||
{
|
||||
hexmode = 1;
|
||||
valstring += 2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode)
|
||||
{
|
||||
while (((*valstring >= '0') && (*valstring <= '9'))
|
||||
|| ((*valstring >= 'a') && (*valstring <= 'f')))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
if (digit > 9)
|
||||
digit = 10 + *valstring - 'a';
|
||||
retval *= 16;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((*valstring >= '0') && (*valstring <= '9'))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
retval *= 10;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring == 'K')
|
||||
retval *= 1024;
|
||||
if (*valstring == 'M')
|
||||
retval *= 1024 * 1024;
|
||||
|
||||
retval*=neg;
|
||||
return retval;
|
||||
retval *= neg;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]) {
|
||||
if (argc>i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
ee_s32
|
||||
get_seed_args(int i, int argc, char *argv[])
|
||||
{
|
||||
if (argc > i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif (SEED_METHOD==SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */
|
||||
ee_s32 get_seed_32(int i) {
|
||||
ee_s32 retval;
|
||||
switch (i) {
|
||||
case 1:
|
||||
retval=portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval=portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval=portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval=portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval=portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval=0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
#elif (SEED_METHOD == SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below
|
||||
* in core_portme.h and core_portme.c ! */
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval = portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval = portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval = portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval = portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function: crc*
|
||||
Service functions to calculate 16b CRC code.
|
||||
Service functions to calculate 16b CRC code.
|
||||
|
||||
*/
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc )
|
||||
ee_u16
|
||||
crcu8(ee_u8 data, ee_u16 crc)
|
||||
{
|
||||
ee_u8 i=0,x16=0,carry=0;
|
||||
ee_u8 i = 0, x16 = 0, carry = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc) {
|
||||
crc=crcu8( (ee_u8) (newval) ,crc);
|
||||
crc=crcu8( (ee_u8) ((newval)>>8) ,crc);
|
||||
return crc;
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc) {
|
||||
crc=crc16((ee_s16) newval ,crc);
|
||||
crc=crc16((ee_s16) (newval>>16) ,crc);
|
||||
return crc;
|
||||
ee_u16
|
||||
crcu16(ee_u16 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crcu8((ee_u8)(newval), crc);
|
||||
crc = crcu8((ee_u8)((newval) >> 8), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc) {
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
ee_u16
|
||||
crcu32(ee_u32 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crc16((ee_s16)newval, crc);
|
||||
crc = crc16((ee_s16)(newval >> 16), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crc16(ee_s16 newval, ee_u16 crc)
|
||||
{
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
}
|
||||
|
||||
ee_u8 check_data_types() {
|
||||
ee_u8 retval=0;
|
||||
if (sizeof(ee_u8) != 1) {
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2) {
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2) {
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4) {
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *)) {
|
||||
ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval>0) {
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
ee_u8
|
||||
check_data_types()
|
||||
{
|
||||
ee_u8 retval = 0;
|
||||
if (sizeof(ee_u8) != 1)
|
||||
{
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval > 0)
|
||||
{
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
@ -17,23 +17,23 @@ Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains declarations of the various benchmark functions.
|
||||
This file contains declarations of the various benchmark functions.
|
||||
*/
|
||||
|
||||
/* Configuration: TOTAL_DATA_SIZE
|
||||
Define total size for data algorithms will operate on
|
||||
Define total size for data algorithms will operate on
|
||||
*/
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2*1000
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2 * 1000
|
||||
#endif
|
||||
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_VOLATILE 2
|
||||
|
||||
#define MEM_STATIC 0
|
||||
#define MEM_MALLOC 1
|
||||
#define MEM_STACK 2
|
||||
#define MEM_STACK 2
|
||||
|
||||
#include "core_portme.h"
|
||||
|
||||
@ -48,8 +48,8 @@ Original Author: Shay Gal-on
|
||||
void *iterate(void *pres);
|
||||
|
||||
/* Typedef: secs_ret
|
||||
For machines that have floating point support, get number of seconds as a double.
|
||||
Otherwise an unsigned int.
|
||||
For machines that have floating point support, get number of seconds as
|
||||
a double. Otherwise an unsigned int.
|
||||
*/
|
||||
#if HAS_FLOAT
|
||||
typedef double secs_ret;
|
||||
@ -58,47 +58,48 @@ typedef ee_u32 secs_ret;
|
||||
#endif
|
||||
|
||||
#if MAIN_HAS_NORETURN
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_TYPE void
|
||||
#else
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_TYPE int
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
CORE_TICKS get_time(void);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
|
||||
/* Misc useful functions */
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
|
||||
ee_u8 check_data_types();
|
||||
void *portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_u8 check_data_types(void);
|
||||
void * portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_s32 parseval(char *valstring);
|
||||
|
||||
/* Algorithm IDS */
|
||||
#define ID_LIST (1<<0)
|
||||
#define ID_MATRIX (1<<1)
|
||||
#define ID_STATE (1<<2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
#define ID_LIST (1 << 0)
|
||||
#define ID_MATRIX (1 << 1)
|
||||
#define ID_STATE (1 << 2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
|
||||
/* list data structures */
|
||||
typedef struct list_data_s {
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
typedef struct list_data_s
|
||||
{
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
} list_data;
|
||||
|
||||
typedef struct list_head_s {
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
typedef struct list_head_s
|
||||
{
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
} list_head;
|
||||
|
||||
|
||||
/*matrix benchmark related stuff */
|
||||
#define MATDAT_INT 1
|
||||
#if MATDAT_INT
|
||||
@ -109,66 +110,74 @@ typedef ee_f16 MATDAT;
|
||||
typedef ee_f32 MATRES;
|
||||
#endif
|
||||
|
||||
typedef struct MAT_PARAMS_S {
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
typedef struct MAT_PARAMS_S
|
||||
{
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
} mat_params;
|
||||
|
||||
/* state machine related stuff */
|
||||
/* List of all the possible states for the FSM */
|
||||
typedef enum CORE_STATE {
|
||||
CORE_START=0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e ;
|
||||
typedef enum CORE_STATE
|
||||
{
|
||||
CORE_START = 0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e;
|
||||
|
||||
|
||||
/* Helper structure to hold results */
|
||||
typedef struct RESULTS_S {
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void *memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
typedef struct RESULTS_S
|
||||
{
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void * memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
} core_results;
|
||||
|
||||
/* Multicore execution handling */
|
||||
#if (MULTITHREAD>1)
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_u8 core_start_parallel(core_results *res);
|
||||
ee_u8 core_stop_parallel(core_results *res);
|
||||
#endif
|
||||
|
||||
/* list benchmark functions */
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
|
||||
/* state benchmark functions */
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
|
||||
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc);
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc);
|
||||
|
||||
/* matrix benchmark functions */
|
||||
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p);
|
||||
ee_u32 core_init_matrix(ee_u32 blksize,
|
||||
void * memblk,
|
||||
ee_s32 seed,
|
||||
mat_params *p);
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
|
||||
|
||||
|
6
riscv-coremark/coremark/coremark.md5
Normal file
6
riscv-coremark/coremark/coremark.md5
Normal file
@ -0,0 +1,6 @@
|
||||
8d082dc4a9676c02731a8cf209339072 core_list_join.c
|
||||
c984863b84b59185d8b5fb81c1ca7535 core_main.c
|
||||
5fa21a0f7c3964167c9691db531ca652 core_matrix.c
|
||||
edcfc7a0b146a50028014f06e6826aa3 core_state.c
|
||||
45540ba2145adea1ec7ea2c72a1fbbcb core_util.c
|
||||
8ca974c013b380dc7f0d6d1afb76eb2d coremark.h
|
126
riscv-coremark/coremark/cygwin/core_portme.mak
Executable file → Normal file
126
riscv-coremark/coremark/cygwin/core_portme.mak
Executable file → Normal file
@ -14,128 +14,4 @@
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
#File: core_portme.mak
|
||||
|
||||
# Flag: OUTFLAG
|
||||
# Use this flag to define how to to get an executable (e.g -o)
|
||||
OUTFLAG= -o
|
||||
# Flag: CC
|
||||
# Use this flag to define compiler to use
|
||||
CC = gcc
|
||||
# Flag: CFLAGS
|
||||
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
|
||||
PORT_CFLAGS = -O2
|
||||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
|
||||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
|
||||
#Flag: LFLAGS_END
|
||||
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
|
||||
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
|
||||
LFLAGS_END =
|
||||
# Flag: PORT_SRCS
|
||||
# Port specific source files can be added here
|
||||
PORT_SRCS = $(PORT_DIR)/core_portme.c
|
||||
# Flag: LOAD
|
||||
# Define this flag if you need to load to a target, as in a cross compile environment.
|
||||
|
||||
# Flag: RUN
|
||||
# Define this flag if running does not consist of simple invocation of the binary.
|
||||
# In a cross compile environment, you need to define this.
|
||||
|
||||
#For flashing and using a tera term macro, you could use
|
||||
#LOAD = flash ADDR
|
||||
#RUN = ttpmacro coremark.ttl
|
||||
|
||||
#For copying to target and executing via SSH connection, you could use
|
||||
#LOAD = scp $(OUTFILE) user@target:~
|
||||
#RUN = ssh user@target -c
|
||||
|
||||
#For native compilation and execution
|
||||
LOAD = echo Loading done
|
||||
RUN =
|
||||
|
||||
OEXT = .o
|
||||
EXE = .exe
|
||||
|
||||
# Flag: SEPARATE_COMPILE
|
||||
# Define if you need to separate compilation from link stage.
|
||||
# In this case, you also need to define below how to create an object file, and how to link.
|
||||
ifdef SEPARATE_COMPILE
|
||||
|
||||
LD = gcc
|
||||
OBJOUT = -o
|
||||
LFLAGS =
|
||||
OFLAG = -o
|
||||
COUT = -c
|
||||
# Flag: PORT_OBJS
|
||||
# Port specific object files can be added here
|
||||
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
|
||||
PORT_CLEAN = *$(OEXT)
|
||||
|
||||
$(OPATH)%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
endif
|
||||
|
||||
# Target: port_prebuild
|
||||
# Generate any files that are needed before actual build starts.
|
||||
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
|
||||
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
|
||||
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
|
||||
# Note - Using REBUILD=1
|
||||
#
|
||||
# Use make PGO=1 to invoke this sample processing.
|
||||
|
||||
ifdef PGO
|
||||
ifeq (,$(findstring $(PGO),gen))
|
||||
PGO_STAGE=build_pgo_gcc
|
||||
CFLAGS+=-fprofile-use
|
||||
endif
|
||||
PORT_CLEAN+=*.gcda *.gcno gmon.out
|
||||
endif
|
||||
|
||||
.PHONY: port_prebuild
|
||||
port_prebuild: $(PGO_STAGE)
|
||||
|
||||
.PHONY: build_pgo_gcc
|
||||
build_pgo_gcc:
|
||||
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
|
||||
|
||||
# Target: port_postbuild
|
||||
# Generate any files that are needed after actual build end.
|
||||
# E.g. change format to srec, bin, zip in order to be able to load into flash
|
||||
.PHONY: port_postbuild
|
||||
port_postbuild:
|
||||
|
||||
# Target: port_postrun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_postrun
|
||||
port_postrun:
|
||||
|
||||
# Target: port_prerun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_prerun
|
||||
port_prerun:
|
||||
|
||||
# Target: port_postload
|
||||
# Do platform specific after load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_postload
|
||||
port_postload:
|
||||
|
||||
# Target: port_preload
|
||||
# Do platform specific before load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_preload
|
||||
port_preload:
|
||||
|
||||
|
||||
# FLAG: OPATH
|
||||
# Path to the output folder. Default - current folder.
|
||||
OPATH = ./
|
||||
MKDIR = mkdir -p
|
||||
|
||||
# FLAG: PERL
|
||||
# Define perl executable to calculate the geomean if running separate.
|
||||
PERL=perl
|
||||
include posix/core_portme.mak
|
||||
|
17
riscv-coremark/coremark/freebsd/core_portme.mak
Normal file
17
riscv-coremark/coremark/freebsd/core_portme.mak
Normal file
@ -0,0 +1,17 @@
|
||||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
include posix/core_portme.mak
|
125
riscv-coremark/coremark/linux/core_portme.mak
Executable file → Normal file
125
riscv-coremark/coremark/linux/core_portme.mak
Executable file → Normal file
@ -14,127 +14,4 @@
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
#File: core_portme.mak
|
||||
|
||||
# Flag: OUTFLAG
|
||||
# Use this flag to define how to to get an executable (e.g -o)
|
||||
OUTFLAG= -o
|
||||
# Flag: CC
|
||||
# Use this flag to define compiler to use
|
||||
CC = gcc
|
||||
# Flag: CFLAGS
|
||||
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
|
||||
PORT_CFLAGS = -O2
|
||||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
|
||||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
|
||||
#Flag: LFLAGS_END
|
||||
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
|
||||
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
|
||||
LFLAGS_END += -lrt
|
||||
# Flag: PORT_SRCS
|
||||
# Port specific source files can be added here
|
||||
PORT_SRCS = $(PORT_DIR)/core_portme.c
|
||||
# Flag: LOAD
|
||||
# Define this flag if you need to load to a target, as in a cross compile environment.
|
||||
|
||||
# Flag: RUN
|
||||
# Define this flag if running does not consist of simple invocation of the binary.
|
||||
# In a cross compile environment, you need to define this.
|
||||
|
||||
#For flashing and using a tera term macro, you could use
|
||||
#LOAD = flash ADDR
|
||||
#RUN = ttpmacro coremark.ttl
|
||||
|
||||
#For copying to target and executing via SSH connection, you could use
|
||||
#LOAD = scp $(OUTFILE) user@target:~
|
||||
#RUN = ssh user@target -c
|
||||
|
||||
#For native compilation and execution
|
||||
LOAD = echo Loading done
|
||||
RUN =
|
||||
|
||||
OEXT = .o
|
||||
EXE = .exe
|
||||
|
||||
# Flag: SEPARATE_COMPILE
|
||||
# Define if you need to separate compilation from link stage.
|
||||
# In this case, you also need to define below how to create an object file, and how to link.
|
||||
ifdef SEPARATE_COMPILE
|
||||
|
||||
LD = gcc
|
||||
OBJOUT = -o
|
||||
LFLAGS =
|
||||
OFLAG = -o
|
||||
COUT = -c
|
||||
# Flag: PORT_OBJS
|
||||
# Port specific object files can be added here
|
||||
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
|
||||
PORT_CLEAN = *$(OEXT)
|
||||
|
||||
$(OPATH)%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
endif
|
||||
|
||||
# Target: port_prebuild
|
||||
# Generate any files that are needed before actual build starts.
|
||||
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
|
||||
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
|
||||
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
|
||||
# Note - Using REBUILD=1
|
||||
#
|
||||
# Use make PGO=1 to invoke this sample processing.
|
||||
|
||||
ifdef PGO
|
||||
ifeq (,$(findstring $(PGO),gen))
|
||||
PGO_STAGE=build_pgo_gcc
|
||||
CFLAGS+=-fprofile-use
|
||||
endif
|
||||
PORT_CLEAN+=*.gcda *.gcno gmon.out
|
||||
endif
|
||||
|
||||
.PHONY: port_prebuild
|
||||
port_prebuild: $(PGO_STAGE)
|
||||
|
||||
.PHONY: build_pgo_gcc
|
||||
build_pgo_gcc:
|
||||
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
|
||||
|
||||
# Target: port_postbuild
|
||||
# Generate any files that are needed after actual build end.
|
||||
# E.g. change format to srec, bin, zip in order to be able to load into flash
|
||||
.PHONY: port_postbuild
|
||||
port_postbuild:
|
||||
|
||||
# Target: port_postrun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_postrun
|
||||
port_postrun:
|
||||
|
||||
# Target: port_prerun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_prerun
|
||||
port_prerun:
|
||||
|
||||
# Target: port_postload
|
||||
# Do platform specific after load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_postload
|
||||
port_postload:
|
||||
|
||||
# Target: port_preload
|
||||
# Do platform specific before load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_preload
|
||||
port_preload:
|
||||
|
||||
# FLAG: OPATH
|
||||
# Path to the output folder. Default - current folder.
|
||||
OPATH = ./
|
||||
MKDIR = mkdir -p
|
||||
|
||||
# FLAG: PERL
|
||||
# Define perl executable to calculate the geomean if running separate.
|
||||
PERL=/usr/bin/perl
|
||||
include posix/core_portme.mak
|
||||
|
18
riscv-coremark/coremark/macos/core_portme.mak
Normal file
18
riscv-coremark/coremark/macos/core_portme.mak
Normal file
@ -0,0 +1,18 @@
|
||||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
NO_LIBRT = 1
|
||||
include posix/core_portme.mak
|
419
riscv-coremark/coremark/posix/core_portme.c
Normal file
419
riscv-coremark/coremark/posix/core_portme.c
Normal file
@ -0,0 +1,419 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "coremark.h"
|
||||
#if CALLGRIND_RUN
|
||||
#include <valgrind/callgrind.h>
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD == MEM_MALLOC)
|
||||
/* Function: portable_malloc
|
||||
Provide malloc() functionality in a platform specific way.
|
||||
*/
|
||||
void *
|
||||
portable_malloc(size_t size)
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
/* Function: portable_free
|
||||
Provide free() functionality in a platform specific way.
|
||||
*/
|
||||
void
|
||||
portable_free(void *p)
|
||||
{
|
||||
free(p);
|
||||
}
|
||||
#else
|
||||
void *
|
||||
portable_malloc(size_t size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void
|
||||
portable_free(void *p)
|
||||
{
|
||||
p = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
#endif
|
||||
/* Porting: Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
/* Define: TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#if USE_CLOCK
|
||||
#define NSECS_PER_SEC CLOCKS_PER_SEC
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE clock_t
|
||||
#define GETMYTIME(_t) (*_t = clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#elif defined(_MSC_VER)
|
||||
#define NSECS_PER_SEC 10000000
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE FILETIME
|
||||
#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t)
|
||||
#define MYTIMEDIFF(fin, ini) \
|
||||
(((*(__int64 *)&fin) - (*(__int64 *)&ini)) / TIMER_RES_DIVIDER)
|
||||
/* setting to millisces resolution by default with MSDEV */
|
||||
#ifndef TIMER_RES_DIVIDER
|
||||
#define TIMER_RES_DIVIDER 1000
|
||||
#endif
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#elif HAS_TIME_H
|
||||
#define NSECS_PER_SEC 1000000000
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE struct timespec
|
||||
#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME, _t)
|
||||
#define MYTIMEDIFF(fin, ini) \
|
||||
((fin.tv_sec - ini.tv_sec) * (NSECS_PER_SEC / TIMER_RES_DIVIDER) \
|
||||
+ (fin.tv_nsec - ini.tv_nsec) / TIMER_RES_DIVIDER)
|
||||
/* setting to 1/1000 of a second resolution by default with linux */
|
||||
#ifndef TIMER_RES_DIVIDER
|
||||
#define TIMER_RES_DIVIDER 1000000
|
||||
#endif
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#else
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 0
|
||||
#endif
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
#if SAMPLE_TIME_IMPLEMENTATION
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function: start_time
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_START_INSTRUMENTATION
|
||||
#endif
|
||||
#if MICA
|
||||
asm volatile("int3"); /*1 */
|
||||
#endif
|
||||
}
|
||||
/* Function: stop_time
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_STOP_INSTRUMENTATION
|
||||
#endif
|
||||
#if MICA
|
||||
asm volatile("int3"); /*1 */
|
||||
#endif
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function: get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accomodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function: time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
#else
|
||||
#error "Please implement timing functionality in core_portme.c"
|
||||
#endif /* SAMPLE_TIME_IMPLEMENTATION */
|
||||
|
||||
ee_u32 default_num_contexts = MULTITHREAD;
|
||||
|
||||
/* Function: portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
#if PRINT_ARGS
|
||||
int i;
|
||||
for (i = 0; i < *argc; i++)
|
||||
{
|
||||
ee_printf("Arg[%d]=%s\n", i, argv[i]);
|
||||
}
|
||||
#endif
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
#if (MAIN_HAS_NOARGC && (SEED_METHOD == SEED_ARG))
|
||||
ee_printf(
|
||||
"ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n");
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD > 1) && (SEED_METHOD == SEED_ARG)
|
||||
int nargs = *argc, i;
|
||||
if ((nargs > 1) && (*argv[1] == 'M'))
|
||||
{
|
||||
default_num_contexts = parseval(argv[1] + 1);
|
||||
if (default_num_contexts > MULTITHREAD)
|
||||
default_num_contexts = MULTITHREAD;
|
||||
/* Shift args since first arg is directed to the portable part and not
|
||||
* to coremark main */
|
||||
--nargs;
|
||||
for (i = 1; i < nargs; i++)
|
||||
argv[i] = argv[i + 1];
|
||||
*argc = nargs;
|
||||
}
|
||||
#endif /* sample of potential platform specific init via command line, reset \
|
||||
the number of contexts being used if first argument is M<n>*/
|
||||
p->portable_id = 1;
|
||||
}
|
||||
/* Function: portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id = 0;
|
||||
}
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
|
||||
/* Function: core_start_parallel
|
||||
Start benchmarking in a parallel context.
|
||||
|
||||
Three implementations are provided, one using pthreads, one using fork
|
||||
and shared mem, and one using fork and sockets. Other implementations using
|
||||
MCAPI or other standards can easily be devised.
|
||||
*/
|
||||
/* Function: core_stop_parallel
|
||||
Stop a parallel context execution of coremark, and gather the results.
|
||||
|
||||
Three implementations are provided, one using pthreads, one using fork
|
||||
and shared mem, and one using fork and sockets. Other implementations using
|
||||
MCAPI or other standards can easily be devised.
|
||||
*/
|
||||
#if USE_PTHREAD
|
||||
ee_u8
|
||||
core_start_parallel(core_results *res)
|
||||
{
|
||||
return (ee_u8)pthread_create(
|
||||
&(res->port.thread), NULL, iterate, (void *)res);
|
||||
}
|
||||
ee_u8
|
||||
core_stop_parallel(core_results *res)
|
||||
{
|
||||
void *retval;
|
||||
return (ee_u8)pthread_join(res->port.thread, &retval);
|
||||
}
|
||||
#elif USE_FORK
|
||||
static int key_id = 0;
|
||||
ee_u8
|
||||
core_start_parallel(core_results *res)
|
||||
{
|
||||
key_t key = 4321 + key_id;
|
||||
key_id++;
|
||||
res->port.pid = fork();
|
||||
res->port.shmid = shmget(key, 8, IPC_CREAT | 0666);
|
||||
if (res->port.shmid < 0)
|
||||
{
|
||||
ee_printf("ERROR in shmget!\n");
|
||||
}
|
||||
if (res->port.pid == 0)
|
||||
{
|
||||
iterate(res);
|
||||
res->port.shm = shmat(res->port.shmid, NULL, 0);
|
||||
/* copy the validation values to the shared memory area and quit*/
|
||||
if (res->port.shm == (char *)-1)
|
||||
{
|
||||
ee_printf("ERROR in child shmat!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(res->port.shm, &(res->crc), 8);
|
||||
shmdt(res->port.shm);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
ee_u8
|
||||
core_stop_parallel(core_results *res)
|
||||
{
|
||||
int status;
|
||||
pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED);
|
||||
if (wpid != res->port.pid)
|
||||
{
|
||||
ee_printf("ERROR waiting for child.\n");
|
||||
if (errno == ECHILD)
|
||||
ee_printf("errno=No such child %d\n", res->port.pid);
|
||||
if (errno == EINTR)
|
||||
ee_printf("errno=Interrupted\n");
|
||||
return 0;
|
||||
}
|
||||
/* after process is done, get the values from the shared memory area */
|
||||
res->port.shm = shmat(res->port.shmid, NULL, 0);
|
||||
if (res->port.shm == (char *)-1)
|
||||
{
|
||||
ee_printf("ERROR in parent shmat!\n");
|
||||
return 0;
|
||||
}
|
||||
memcpy(&(res->crc), res->port.shm, 8);
|
||||
shmdt(res->port.shm);
|
||||
return 1;
|
||||
}
|
||||
#elif USE_SOCKET
|
||||
static int key_id = 0;
|
||||
ee_u8
|
||||
core_start_parallel(core_results *res)
|
||||
{
|
||||
int bound, buffer_length = 8;
|
||||
res->port.sa.sin_family = AF_INET;
|
||||
res->port.sa.sin_addr.s_addr = htonl(0x7F000001);
|
||||
res->port.sa.sin_port = htons(7654 + key_id);
|
||||
key_id++;
|
||||
res->port.pid = fork();
|
||||
if (res->port.pid == 0)
|
||||
{ /* benchmark child */
|
||||
iterate(res);
|
||||
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
if (-1 == res->port.sock) /* if socket failed to initialize, exit */
|
||||
{
|
||||
ee_printf("Error Creating Socket");
|
||||
}
|
||||
else
|
||||
{
|
||||
int bytes_sent = sendto(res->port.sock,
|
||||
&(res->crc),
|
||||
buffer_length,
|
||||
0,
|
||||
(struct sockaddr *)&(res->port.sa),
|
||||
sizeof(struct sockaddr_in));
|
||||
if (bytes_sent < 0)
|
||||
ee_printf("Error sending packet: %s\n", strerror(errno));
|
||||
close(res->port.sock); /* close the socket */
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
/* parent process, open the socket */
|
||||
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
bound = bind(res->port.sock,
|
||||
(struct sockaddr *)&(res->port.sa),
|
||||
sizeof(struct sockaddr));
|
||||
if (bound < 0)
|
||||
ee_printf("bind(): %s\n", strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
ee_u8
|
||||
core_stop_parallel(core_results *res)
|
||||
{
|
||||
int status;
|
||||
int fromlen = sizeof(struct sockaddr);
|
||||
int recsize = recvfrom(res->port.sock,
|
||||
&(res->crc),
|
||||
8,
|
||||
0,
|
||||
(struct sockaddr *)&(res->port.sa),
|
||||
&fromlen);
|
||||
if (recsize < 0)
|
||||
{
|
||||
ee_printf("Error in receive: %s\n", strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED);
|
||||
if (wpid != res->port.pid)
|
||||
{
|
||||
ee_printf("ERROR waiting for child.\n");
|
||||
if (errno == ECHILD)
|
||||
ee_printf("errno=No such child %d\n", res->port.pid);
|
||||
if (errno == EINTR)
|
||||
ee_printf("errno=Interrupted\n");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#else /* no standard multicore implementation */
|
||||
#error \
|
||||
"Please implement multicore functionality in core_portme.c to use multiple contexts."
|
||||
#endif /* multithread implementations */
|
||||
#endif
|
314
riscv-coremark/coremark/posix/core_portme.h
Normal file
314
riscv-coremark/coremark/posix/core_portme.h
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
|
||||
#include "core_portme_posix_overrides.h"
|
||||
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration: HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 1
|
||||
#endif
|
||||
/* Configuration: HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 1
|
||||
#endif
|
||||
/* Configuration: USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 0
|
||||
#endif
|
||||
/* Configuration: HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 1
|
||||
#endif
|
||||
/* Configuration: HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 1
|
||||
#endif
|
||||
|
||||
/* Configuration: CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
typedef size_t CORE_TICKS;
|
||||
#elif HAS_TIME_H
|
||||
#include <time.h>
|
||||
typedef clock_t CORE_TICKS;
|
||||
#else
|
||||
#error \
|
||||
"Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!"
|
||||
#endif
|
||||
|
||||
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS \
|
||||
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION \
|
||||
"Please put data memory location here\n\t\t\t(e.g. code in flash, data " \
|
||||
"on heap etc)"
|
||||
#define MEM_LOCATION_UNSPEC 1
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Data Types:
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant*:
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef uintptr_t ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
/* align an offset to point to a 32b value */
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration: SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values:
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_ARG
|
||||
#endif
|
||||
|
||||
/* Configuration: MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values:
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_MALLOC
|
||||
#endif
|
||||
|
||||
/* Configuration: MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values:
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note:
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_PTHREAD
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses pthread_thread_create and pthread_join.
|
||||
|
||||
Valid values:
|
||||
0 - Do not use pthreads API.
|
||||
1 - Use pthreads API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef USE_PTHREAD
|
||||
#define USE_PTHREAD 0
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_FORK
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses fork, waitpid, shmget,shmat and shmdt.
|
||||
|
||||
Valid values:
|
||||
0 - Do not use fork API.
|
||||
1 - Use fork API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef USE_FORK
|
||||
#define USE_FORK 0
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_SOCKET
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses fork, socket, sendto and recvfrom
|
||||
|
||||
Valid values:
|
||||
0 - Do not use fork and sockets API.
|
||||
1 - Use fork and sockets API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef USE_SOCKET
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration: MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values:
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 0
|
||||
#endif
|
||||
|
||||
/* Configuration: MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values:
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable: default_num_contexts
|
||||
Number of contexts to spawn in multicore context.
|
||||
Override this global value to change number of contexts used.
|
||||
|
||||
Note:
|
||||
This value may not be set higher then the <MULTITHREAD> define.
|
||||
|
||||
To experiment, you can set the <MULTITHREAD> define to the highest value
|
||||
expected, and use argc/argv in the <portable_init> to set this value from the
|
||||
command line.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
#if USE_PTHREAD
|
||||
#include <pthread.h>
|
||||
#define PARALLEL_METHOD "PThreads"
|
||||
#elif USE_FORK
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/shm.h>
|
||||
#include <string.h> /* for memcpy */
|
||||
#define PARALLEL_METHOD "Fork"
|
||||
#elif USE_SOCKET
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#define PARALLEL_METHOD "Sockets"
|
||||
#else
|
||||
#define PARALLEL_METHOD "Proprietary"
|
||||
#error \
|
||||
"Please implement multicore functionality in core_portme.c to use multiple contexts."
|
||||
#endif /* Method for multithreading */
|
||||
#endif /* MULTITHREAD > 1 */
|
||||
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
#if (MULTITHREAD > 1)
|
||||
#if USE_PTHREAD
|
||||
pthread_t thread;
|
||||
#elif USE_FORK
|
||||
pid_t pid;
|
||||
int shmid;
|
||||
void *shm;
|
||||
#elif USE_SOCKET
|
||||
pid_t pid;
|
||||
int sock;
|
||||
struct sockaddr_in sa;
|
||||
#endif /* Method for multithreading */
|
||||
#endif /* MULTITHREAD>1 */
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
#if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN)
|
||||
#define RUN_TYPE_FLAG 1
|
||||
#else
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#else
|
||||
#define PERFORMANCE_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
#endif /* SEED_METHOD==SEED_VOLATILE */
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
151
riscv-coremark/coremark/posix/core_portme.mak
Executable file
151
riscv-coremark/coremark/posix/core_portme.mak
Executable file
@ -0,0 +1,151 @@
|
||||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
#File: core_portme.mak
|
||||
|
||||
# Flag: OUTFLAG
|
||||
# Use this flag to define how to to get an executable (e.g -o)
|
||||
OUTFLAG= -o
|
||||
# Flag: CC
|
||||
# Use this flag to define compiler to use
|
||||
CC?= cc
|
||||
# Flag: CFLAGS
|
||||
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
|
||||
PORT_CFLAGS = -O2
|
||||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
|
||||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -Iposix -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
|
||||
# Flag: NO_LIBRT
|
||||
# Define if the platform does not provide a librt
|
||||
ifndef NO_LIBRT
|
||||
#Flag: LFLAGS_END
|
||||
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
|
||||
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
|
||||
LFLAGS_END += -lrt
|
||||
endif
|
||||
# Flag: PORT_SRCS
|
||||
# Port specific source files can be added here
|
||||
PORT_SRCS = posix/core_portme.c
|
||||
vpath %.c posix
|
||||
vpath %.h posix
|
||||
vpath %.mak posix
|
||||
# Flag: EXTRA_DEPENDS
|
||||
# Port specific extra build dependencies.
|
||||
# Some ports inherit from us, so ensure this Makefile is always a dependency.
|
||||
EXTRA_DEPENDS += posix/core_portme.mak
|
||||
# Flag: LOAD
|
||||
# Define this flag if you need to load to a target, as in a cross compile environment.
|
||||
|
||||
# Flag: RUN
|
||||
# Define this flag if running does not consist of simple invocation of the binary.
|
||||
# In a cross compile environment, you need to define this.
|
||||
|
||||
#For flashing and using a tera term macro, you could use
|
||||
#LOAD = flash ADDR
|
||||
#RUN = ttpmacro coremark.ttl
|
||||
|
||||
#For copying to target and executing via SSH connection, you could use
|
||||
#LOAD = scp $(OUTFILE) user@target:~
|
||||
#RUN = ssh user@target -c
|
||||
|
||||
#For native compilation and execution
|
||||
LOAD = echo Loading done
|
||||
RUN =
|
||||
|
||||
OEXT = .o
|
||||
EXE = .exe
|
||||
|
||||
# Flag: SEPARATE_COMPILE
|
||||
# Define if you need to separate compilation from link stage.
|
||||
# In this case, you also need to define below how to create an object file, and how to link.
|
||||
ifdef SEPARATE_COMPILE
|
||||
|
||||
LD = gcc
|
||||
OBJOUT = -o
|
||||
LFLAGS =
|
||||
OFLAG = -o
|
||||
COUT = -c
|
||||
# Flag: PORT_OBJS
|
||||
# Port specific object files can be added here
|
||||
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
|
||||
PORT_CLEAN = *$(OEXT)
|
||||
|
||||
$(OPATH)%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
endif
|
||||
|
||||
# Target: port_prebuild
|
||||
# Generate any files that are needed before actual build starts.
|
||||
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
|
||||
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
|
||||
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
|
||||
# Note - Using REBUILD=1
|
||||
#
|
||||
# Use make PGO=1 to invoke this sample processing.
|
||||
|
||||
ifdef PGO
|
||||
ifeq (,$(findstring $(PGO),gen))
|
||||
PGO_STAGE=build_pgo_gcc
|
||||
CFLAGS+=-fprofile-use
|
||||
endif
|
||||
PORT_CLEAN+=*.gcda *.gcno gmon.out
|
||||
endif
|
||||
|
||||
.PHONY: port_prebuild
|
||||
port_prebuild: $(PGO_STAGE)
|
||||
|
||||
.PHONY: build_pgo_gcc
|
||||
build_pgo_gcc:
|
||||
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
|
||||
|
||||
# Target: port_postbuild
|
||||
# Generate any files that are needed after actual build end.
|
||||
# E.g. change format to srec, bin, zip in order to be able to load into flash
|
||||
.PHONY: port_postbuild
|
||||
port_postbuild:
|
||||
|
||||
# Target: port_postrun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_postrun
|
||||
port_postrun:
|
||||
|
||||
# Target: port_prerun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_prerun
|
||||
port_prerun:
|
||||
|
||||
# Target: port_postload
|
||||
# Do platform specific after load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_postload
|
||||
port_postload:
|
||||
|
||||
# Target: port_preload
|
||||
# Do platform specific before load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_preload
|
||||
port_preload:
|
||||
|
||||
# FLAG: OPATH
|
||||
# Path to the output folder. Default - current folder.
|
||||
OPATH = ./
|
||||
MKDIR = mkdir -p
|
||||
|
||||
# FLAG: PERL
|
||||
# Define perl executable to calculate the geomean if running separate.
|
||||
PERL=/usr/bin/perl
|
28
riscv-coremark/coremark/posix/core_portme_posix_overrides.h
Normal file
28
riscv-coremark/coremark/posix/core_portme_posix_overrides.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains additional configuration constants required to execute on
|
||||
different platforms over and above the POSIX defaults
|
||||
*/
|
||||
#ifndef CORE_PORTME_POSIX_OVERRIDES_H
|
||||
#define CORE_PORTME_POSIX_OVERRIDES_H
|
||||
|
||||
/* None by default */
|
||||
|
||||
#endif
|
18
riscv-coremark/coremark/rtems/core_portme.mak
Normal file
18
riscv-coremark/coremark/rtems/core_portme.mak
Normal file
@ -0,0 +1,18 @@
|
||||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
NO_LIBRT = 1
|
||||
include posix/core_portme.mak
|
63
riscv-coremark/coremark/rtems/init.c
Normal file
63
riscv-coremark/coremark/rtems/init.c
Normal file
@ -0,0 +1,63 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 Hesham Almatary
|
||||
*
|
||||
* This software was developed by SRI International and the University of
|
||||
* Cambridge Computer Laboratory (Department of Computer Science and
|
||||
* Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
|
||||
* DARPA SSITH research programme.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <bsp.h>
|
||||
|
||||
int main(
|
||||
int argc,
|
||||
void **args
|
||||
);
|
||||
|
||||
rtems_task Init(
|
||||
rtems_task_argument ignored
|
||||
);
|
||||
|
||||
rtems_task Init(
|
||||
rtems_task_argument ignored
|
||||
)
|
||||
{
|
||||
int ret = main(0, NULL);
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
/* configuration information */
|
||||
#define CONFIGURE_APPLICATION_NEEDS_SIMPLE_CONSOLE_DRIVER
|
||||
#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
|
||||
|
||||
#define CONFIGURE_MAXIMUM_TASKS 20
|
||||
|
||||
#define CONFIGURE_RTEMS_INIT_TASKS_TABLE
|
||||
|
||||
#define CONFIGURE_INIT
|
||||
|
||||
#include <rtems/confdefs.h>
|
153
riscv-coremark/coremark/simple/core_portme.c
Executable file → Normal file
153
riscv-coremark/coremark/simple/core_portme.c
Executable file → Normal file
@ -21,108 +21,129 @@ Original Author: Shay Gal-on
|
||||
#include "coremark.h"
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile=0x3415;
|
||||
volatile ee_s32 seed2_volatile=0x3415;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x0;
|
||||
volatile ee_s32 seed2_volatile=0x0;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x8;
|
||||
volatile ee_s32 seed2_volatile=0x8;
|
||||
volatile ee_s32 seed3_volatile=0x8;
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile=ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile=0;
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
|
||||
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
|
||||
Sample implementation for standard time.h and windows.h definitions included.
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be measured.
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow does not occur.
|
||||
If there are issues with the return value overflowing, increase this value.
|
||||
*/
|
||||
#define NSECS_PER_SEC CLOCKS_PER_SEC
|
||||
#define CORETIMETYPE clock_t
|
||||
#define GETMYTIME(_t) (*_t=clock())
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#define NSECS_PER_SEC CLOCKS_PER_SEC
|
||||
#define CORETIMETYPE clock_t
|
||||
#define GETMYTIME(_t) (*_t = clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of the benchmark.
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
GETMYTIME(&start_time_val );
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the benchmark.
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or other system parameters - e.g. reading the current value of cpu cycles counter.
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void stop_time(void) {
|
||||
GETMYTIME(&stop_time_val );
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other value,
|
||||
as long as it can be converted to seconds by <time_in_secs>.
|
||||
This methodology is taken to accomodate any hardware or simulated platform.
|
||||
The sample implementation returns millisecs by default,
|
||||
and the resolution is controlled by <TIMER_RES_DIVIDER>
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accomodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for floating point.
|
||||
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
|
||||
The <secs_ret> type is used to accomodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts=1;
|
||||
ee_u32 default_num_contexts = 1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void portable_init(core_portable *p, int *argc, char *argv[])
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
|
||||
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id=1;
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id = 1;
|
||||
}
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
Target specific final code
|
||||
*/
|
||||
void portable_fini(core_portable *p)
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id=0;
|
||||
p->portable_id = 0;
|
||||
}
|
||||
|
||||
|
||||
|
176
riscv-coremark/coremark/simple/core_portme.h
Executable file → Normal file
176
riscv-coremark/coremark/simple/core_portme.h
Executable file → Normal file
@ -17,176 +17,188 @@ Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on different platforms
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 1
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 1
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 1
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 1
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf function.
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 1
|
||||
#endif
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
#include <time.h>
|
||||
typedef clock_t CORE_TICKS;
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS \
|
||||
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
|
||||
This macro is used to align an offset to point to a 32b value. It is
|
||||
used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
|
||||
to fit a particular architecture.
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S {
|
||||
ee_u8 portable_id;
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE==1200)
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
|
||||
&& !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE==2000)
|
||||
#elif (TOTAL_DATA_SIZE == 2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
|
@ -125,6 +125,28 @@ void portable_free(void *p) {
|
||||
#if SAMPLE_TIME_IMPLEMENTATION
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
static unsigned long start_instr_val, stop_instr_val;
|
||||
|
||||
/* Function: minstretFunc
|
||||
This function will count the number of instructions.
|
||||
*/
|
||||
unsigned long minstretFunc(void)
|
||||
{
|
||||
unsigned long minstretRead = read_csr(minstret);
|
||||
//ee_printf("Minstret is %lu\n", minstretRead);
|
||||
return minstretRead;
|
||||
}
|
||||
|
||||
/* Function: minstretDiff
|
||||
This function will take the difference between the first and second reads from the
|
||||
MINSTRET csr to determine the number of machine instructions retired between two points
|
||||
of time
|
||||
*/
|
||||
unsigned long minstretDiff(void)
|
||||
{
|
||||
unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val);
|
||||
return minstretDifference;
|
||||
}
|
||||
|
||||
/* Function: start_time
|
||||
This function will be called right before starting the timed portion of the benchmark.
|
||||
@ -133,9 +155,10 @@ static CORETIMETYPE start_time_val, stop_time_val;
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
start_instr_val = minstretFunc();
|
||||
GETMYTIME(start_time_val);
|
||||
ee_printf("Timer started\n");
|
||||
ee_printf(" MTIME: %u\n", start_time_val);
|
||||
//ee_printf("Timer started\n");
|
||||
//ee_printf(" MTIME: %u\n", start_time_val);
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_START_INSTRUMENTATION
|
||||
#endif
|
||||
@ -157,8 +180,9 @@ void stop_time(void) {
|
||||
asm volatile("int3");/*1 */
|
||||
#endif
|
||||
GETMYTIME(stop_time_val);
|
||||
ee_printf("Timer stopped\n");
|
||||
ee_printf(" MTIME: %u\n", stop_time_val);
|
||||
stop_instr_val = minstretFunc();
|
||||
//ee_printf("Timer stopped\n");
|
||||
//ee_printf(" MTIME: %u\n", stop_time_val);
|
||||
}
|
||||
/* Function: get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
@ -171,7 +195,8 @@ void stop_time(void) {
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
ee_printf(" Elapsed MTIME: %u\n", elapsed);
|
||||
//ee_printf(" Elapsed MTIME: %u\n", elapsed);
|
||||
//ee_printf(" Elapsed MINSTRET: %lu\n", minstretDiff());
|
||||
return elapsed;
|
||||
}
|
||||
/* Function: time_in_secs
|
||||
@ -183,7 +208,7 @@ CORE_TICKS get_time(void) {
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
int retvalint = (int)retval;
|
||||
ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
|
||||
//ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
|
||||
return retval;
|
||||
}
|
||||
#else
|
||||
|
@ -34,7 +34,8 @@
|
||||
`define XLEN 64
|
||||
|
||||
//`define MISA (32'h00000104)
|
||||
`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
|
||||
//`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
|
||||
`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
|
||||
`define ZCSR_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
`define ZCOUNTERS_SUPPORTED 1
|
||||
@ -53,7 +54,7 @@
|
||||
`define DTLB_ENTRIES 32
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 16
|
||||
`define PMP_ENTRIES 64
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 64'h0000000080000000
|
||||
@ -66,23 +67,23 @@
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 34'h00001000
|
||||
`define BOOTTIM_RANGE 34'h00000FFF
|
||||
`define BOOTTIM_BASE 56'h00001000
|
||||
`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 34'h80000000
|
||||
`define TIM_RANGE 34'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 34'h02000000
|
||||
`define CLINT_RANGE 34'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 34'h10012000
|
||||
`define GPIO_RANGE 34'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 34'h10000000
|
||||
`define UART_RANGE 34'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 34'h0C000000
|
||||
`define PLIC_RANGE 34'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Test modes
|
||||
|
||||
|
1024
wally-pipelined/config/rv32icfd/BTBPredictor.txt
Normal file
1024
wally-pipelined/config/rv32icfd/BTBPredictor.txt
Normal file
File diff suppressed because it is too large
Load Diff
1024
wally-pipelined/config/rv32icfd/twoBitPredictor.txt
Normal file
1024
wally-pipelined/config/rv32icfd/twoBitPredictor.txt
Normal file
File diff suppressed because it is too large
Load Diff
106
wally-pipelined/config/rv32icfd/wally-config.vh
Normal file
106
wally-pipelined/config/rv32icfd/wally-config.vh
Normal file
@ -0,0 +1,106 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
// include shared configuration
|
||||
`include "wally-shared.vh"
|
||||
|
||||
`define BUILDROOT 0
|
||||
`define BUSYBEAR 0
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 32
|
||||
|
||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
|
||||
`define ZCSR_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
`define ZCOUNTERS_SUPPORTED 1
|
||||
|
||||
// Microarchitectural Features
|
||||
`define UARCH_PIPELINED 1
|
||||
`define UARCH_SUPERSCALR 0
|
||||
`define UARCH_SINGLECYCLE 0
|
||||
`define MEM_DCACHE 0
|
||||
`define MEM_DTIM 1
|
||||
`define MEM_ICACHE 0
|
||||
`define MEM_VIRTMEM 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
|
||||
`define ITLB_ENTRIES 32
|
||||
`define DTLB_ENTRIES 32
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 16
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 32'h80000000
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 34'h00001000
|
||||
`define BOOTTIM_RANGE 34'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 34'h80000000
|
||||
`define TIM_RANGE 34'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 34'h02000000
|
||||
`define CLINT_RANGE 34'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 34'h10012000
|
||||
`define GPIO_RANGE 34'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 34'h10000000
|
||||
`define UART_RANGE 34'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 34'h0C000000
|
||||
`define PLIC_RANGE 34'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 32
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
`define GPIO_LOOPBACK_TEST 1
|
||||
|
||||
// Hardware configuration
|
||||
`define UART_PRESCALE 1
|
||||
|
||||
// Interrupt configuration
|
||||
`define PLIC_NUM_SRC 4
|
||||
// comment out the following if >=32 sources
|
||||
`define PLIC_NUM_SRC_LT_32
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 4
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32icfd/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32icfd/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
@ -46,7 +46,7 @@
|
||||
`define MEM_DCACHE 0
|
||||
`define MEM_DTIM 1
|
||||
`define MEM_ICACHE 0
|
||||
`define MEM_VIRTMEM 0\1
|
||||
`define MEM_VIRTMEM 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
|
||||
`define ITLB_ENTRIES 32
|
||||
@ -56,10 +56,7 @@
|
||||
`define PMP_ENTRIES 16
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 64'h0000000080000000
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
`define RESET_VECTOR 64'h80000000
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
@ -84,6 +81,9 @@
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
@ -101,6 +101,7 @@
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt"
|
||||
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
57
wally-pipelined/linux-testgen/WALLY-README.txt
Normal file
57
wally-pipelined/linux-testgen/WALLY-README.txt
Normal file
@ -0,0 +1,57 @@
|
||||
If you do not need to update the Linux image, then go to ./linux-testvectors and
|
||||
use tvCopier.py or tvLinker.sh to copy/link premade RAMs and testvectors from Tera.
|
||||
The RAMs are needed for Wally to run the Linux code, and the testvectors are needed
|
||||
to verify Wally is executing the code correctly.
|
||||
|
||||
If you instead wish to regenerate the RAMs and testvectors from a new Linux image,
|
||||
you'll need to build the new Linux image, simulate it, and parse its output,
|
||||
as described below.
|
||||
|
||||
*To build a new Linux image:
|
||||
1. Git clone the Buildroot repository to ./buildroot:
|
||||
git clone https://github.com/buildroot/buildroot.git
|
||||
For reference, Wally (*** will) be proven to work on an image built using
|
||||
Buildroot when the following was the most recent commit to the Buildroot repo:
|
||||
commit 4047e10ed6e20492bae572d4929eaa5d67eed746
|
||||
Author: Gwenhael Goavec-Merou <gwenhael.goavec-merou@trabucayre.com>
|
||||
Date: Wed Jun 30 06:27:10 2021 +0200
|
||||
|
||||
2. If you wish to modify the configs, then in ./buildroot:
|
||||
a. Run "make menuconfig" or "make linux-menuconfig" or "make busybox-menuconfig".
|
||||
b. Use the TUI (terminal UI) to load in the existing configs.
|
||||
|
||||
For menuconfig, you can load in the source file from
|
||||
"../buildroot-config-src/main.config"
|
||||
|
||||
For linux-menuconfig or busybox-menuconfig, load in from
|
||||
"../../../../buildroot-config-src/<type>.config"
|
||||
because for linux and busybox, make traverses down to
|
||||
./buildroot/output/build/<linux or busybox>.
|
||||
|
||||
One annoying thing about the TUI is that if it has a path already loaded,
|
||||
then before you can enter the new path to buildroot-config-src, you need to
|
||||
delete the existing one from the textbox. Doing so requires more than backspace.
|
||||
Once you've deleted as much of the existing path as you can see, arrow left to
|
||||
check if there is more text you need to delete.
|
||||
|
||||
c. Likewise, when you are done editing, tell the TUI to save to the same location.
|
||||
|
||||
3. Finally go to ./buildroot-config-src and run make-buildroot.sh.
|
||||
This script copies ./buildroot-config-src/main.config to ./buildroot/.config
|
||||
and then invokes make. This is clumsy but effective because buildroot
|
||||
sometimes does weird things to .config, like moving it to .config.old and
|
||||
making a new .config -- doing so can really mess up symbolic/hard links.
|
||||
|
||||
4. If you'd like debugging symbols, then reconfigure Buildroot to output "vmlinux"
|
||||
and run make-buildroot again.
|
||||
|
||||
*To generate new RAMs and testvectors from a Linux image:
|
||||
1. sym link ./buildroot-image-output to either your new image in ./buildroot/output/image
|
||||
or the existing image at /courses/e190ax/buildroot-image-output on Tera.
|
||||
This might require first deleting the empty buildroot-image-output directory.
|
||||
2. Then run ./testvector-generation/logBuildrootMem.sh to generate RAMs.
|
||||
3. Then run ./testvector-generation/logAllBuildroot.sh to generate testvectors.
|
||||
|
||||
These latter two steps require QEMU.
|
||||
Note that you can only have one instance of QEMU open at a time!
|
||||
At least on Tera, it seems. Check "ps -ef" to see if anybody else is running QEMU.
|
1195
wally-pipelined/linux-testgen/buildroot-config-src/busybox.config
Normal file
1195
wally-pipelined/linux-testgen/buildroot-config-src/busybox.config
Normal file
File diff suppressed because it is too large
Load Diff
1233
wally-pipelined/linux-testgen/buildroot-config-src/linux.config
Normal file
1233
wally-pipelined/linux-testgen/buildroot-config-src/linux.config
Normal file
File diff suppressed because it is too large
Load Diff
3839
wally-pipelined/linux-testgen/buildroot-config-src/main.config
Normal file
3839
wally-pipelined/linux-testgen/buildroot-config-src/main.config
Normal file
File diff suppressed because it is too large
Load Diff
3
wally-pipelined/linux-testgen/buildroot-config-src/make-buildroot.sh
Executable file
3
wally-pipelined/linux-testgen/buildroot-config-src/make-buildroot.sh
Executable file
@ -0,0 +1,3 @@
|
||||
cp main.config ../buildroot/.config
|
||||
cd ../buildroot
|
||||
make
|
@ -1,9 +0,0 @@
|
||||
#! /usr/bin/python3
|
||||
test_dir = '/courses/e190ax/buildroot_boot/'
|
||||
infiles = ['bootmemGDB.txt', 'ramGDB.txt']
|
||||
outfiles = ['bootmem.txt', 'ram.txt']
|
||||
for i in range(len(infiles)):
|
||||
with open(f'{test_dir}{infiles[i]}', 'r') as f:
|
||||
with open(f'{test_dir}{outfiles[i]}', 'w') as w:
|
||||
for l in f:
|
||||
w.write(f'{"".join([x[2:] for x in l.split()[:0:-1]])}\n')
|
@ -1,10 +0,0 @@
|
||||
set pagination off
|
||||
target extended-remote :1234
|
||||
b *0xffffffe00020144e
|
||||
c
|
||||
c
|
||||
c
|
||||
c
|
||||
set confirm off
|
||||
kill
|
||||
q
|
@ -0,0 +1 @@
|
||||
This file only exists so that git will create ./.
|
10
wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh
Executable file
10
wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh
Executable file
@ -0,0 +1,10 @@
|
||||
# This could be nice to use if you want to mess with the testvectors
|
||||
# without corrupting the stable copies on Tera.
|
||||
unlink parsedCSRs.txt
|
||||
unlink parsedMemRead.txt
|
||||
unlink parsedMemWrite.txt
|
||||
unlink parsedPC.txt
|
||||
unlink parsedRegs.txt
|
||||
unlink bootmem.txt
|
||||
unlink ram.txt
|
||||
echo "Done!"
|
@ -1,40 +0,0 @@
|
||||
# Oftentimes this script runs so long you'll go to sleep.
|
||||
# But you don't want the script to die when your computer goes to sleep.
|
||||
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
|
||||
# You can run "tail -f nohup.out" to see what would've
|
||||
# outputted to the terminal if you didn't use nohup
|
||||
|
||||
# =========== Debug the Process ==========
|
||||
# Uncomment this version for GDB/QEMU debugging
|
||||
# - Opens up GDB interactively
|
||||
# - Logs raw QEMU output to qemu_output.txt
|
||||
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2> qemu_output.txt) & riscv64-unknown-elf-gdb
|
||||
|
||||
# Uncomment this version to generate qemu_output.txt
|
||||
# - Uses GDB script
|
||||
# - Logs raw QEMU output to qemu_output.txt
|
||||
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog_debug
|
||||
|
||||
# Uncomment this version for parse_qemu.py debugging
|
||||
# - Uses qemu_output.txt
|
||||
# - Makes qemu_in_gdb_format.txt
|
||||
# - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt
|
||||
#cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt
|
||||
#cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
|
||||
|
||||
# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around
|
||||
# It is often helpful for general debugging
|
||||
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >/courses/e190ax/buildroot_boot/qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
||||
# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection
|
||||
#cd /courses/e190ax/buildroot_boot
|
||||
#split -d -l 5600000 qemu_in_gdb_format.txt --verbose
|
||||
|
||||
# Uncomment this version for parse_gdb_output.py debugging
|
||||
# - Uses qemu_in_gdb_format.txt
|
||||
# - Logs info needed by buildroot testbench
|
||||
#cat qemu_in_gdb_format.txt | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
|
||||
|
||||
# =========== Just Do the Thing ==========
|
||||
# Uncomment this version for the whole thing
|
||||
# - Logs info needed by buildroot testbench
|
||||
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
@ -1,4 +0,0 @@
|
||||
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>/dev/null >/dev/null ) &
|
||||
riscv64-unknown-elf-gdb -x gdbinit_mem
|
||||
#sed -i '$d' $file
|
||||
echo "Done"
|
11
wally-pipelined/linux-testgen/testvector-generation/fix_mem.py
Executable file
11
wally-pipelined/linux-testgen/testvector-generation/fix_mem.py
Executable file
@ -0,0 +1,11 @@
|
||||
#! /usr/bin/python3
|
||||
test_dir = '../'
|
||||
gdbMemfileDir = '../linux-testvectors/intermediate-outputs/'
|
||||
fixedMemfileDir = '../linux-testvectors/'
|
||||
infiles = ['bootmemGDB.txt', 'ramGDB.txt']
|
||||
outfiles = ['bootmem.txt', 'ram.txt']
|
||||
for i in range(len(infiles)):
|
||||
with open(f'{gdbMemfileDir}{infiles[i]}', 'r') as f:
|
||||
with open(f'{fixedMemfileDir}{outfiles[i]}', 'w') as w:
|
||||
for l in f:
|
||||
w.write(f'{"".join([x[2:] for x in l.split()[:0:-1]])}\n')
|
@ -0,0 +1,3 @@
|
||||
file ../buildroot-image-output/vmlinux
|
||||
set pagination off
|
||||
target extended-remote :1236
|
@ -1,20 +1,20 @@
|
||||
set pagination off
|
||||
target extended-remote :1234
|
||||
target extended-remote :1235
|
||||
set logging overwrite on
|
||||
set logging redirect on
|
||||
printf "Creating bootmemGDB.txt\n"
|
||||
set logging file /courses/e190ax/buildroot_boot/bootmemGDB.txt
|
||||
set logging file ../linux-testvectors/intermediate-outputs/bootmemGDB.txt
|
||||
set logging on
|
||||
x/4096xb 0x1000
|
||||
set logging off
|
||||
printf "Creating bootmem_untrimmed_GDB.txt\n"
|
||||
printf "Warning - please verify that the second half of bootmem_untrimmed_GDB.txt is all 0s\n"
|
||||
set logging file /courses/e190ax/buildroot_boot/bootmem_untrimmed_GDB.txt
|
||||
set logging file ../linux-testvectors/intermediate-outputs/bootmem_untrimmed_GDB.txt
|
||||
set logging on
|
||||
x/8192xb 0x1000
|
||||
set logging off
|
||||
printf "Creating ramGDB.txt\n"
|
||||
set logging file /courses/e190ax/buildroot_boot/ramGDB.txt
|
||||
set logging file ../linux-testvectors/intermediate-outputs/ramGDB.txt
|
||||
set logging on
|
||||
x/134217728xb 0x80000000
|
||||
set logging off
|
11
wally-pipelined/linux-testgen/testvector-generation/gdbinit_qemulog
Executable file
11
wally-pipelined/linux-testgen/testvector-generation/gdbinit_qemulog
Executable file
@ -0,0 +1,11 @@
|
||||
set pagination off
|
||||
target extended-remote :1236
|
||||
file ../buildroot-image-output/vmlinux
|
||||
b arch_cpu_idle
|
||||
c
|
||||
c
|
||||
c
|
||||
c
|
||||
set confirm off
|
||||
kill
|
||||
q
|
@ -1,9 +1,10 @@
|
||||
set pagination off
|
||||
target extended-remote :1234
|
||||
maint print symbols symbols.txt
|
||||
b *0x000000008020103c
|
||||
c
|
||||
del 1
|
||||
stepi 100
|
||||
stepi 100000
|
||||
set confirm off
|
||||
kill
|
||||
q
|
44
wally-pipelined/linux-testgen/testvector-generation/logAllBuildroot.sh
Executable file
44
wally-pipelined/linux-testgen/testvector-generation/logAllBuildroot.sh
Executable file
@ -0,0 +1,44 @@
|
||||
# Oftentimes this script runs so long you'll go to sleep.
|
||||
# But you don't want the script to die when your computer goes to sleep.
|
||||
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
|
||||
# You can run "tail -f nohup.out" to see what would've
|
||||
# outputted to the terminal if you didn't use nohup
|
||||
|
||||
customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qemu-system-riscv64"
|
||||
#customQemu="qemu-system-riscv64"
|
||||
imageDir="../buildroot-image-output"
|
||||
intermedDir="../linux-testvectors/intermediate-outputs"
|
||||
outDir="../linux-testvectors"
|
||||
|
||||
# =========== Debug the Process ==========
|
||||
# Uncomment this version for QEMU debugging of kernel
|
||||
# - good for poking around VM if it boots up
|
||||
# - good for running QEMU commands (press "Ctrl-A" then "c" to open QEMU command prompt)
|
||||
#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio
|
||||
# Uncomment this version for GDB debugging of kernel
|
||||
# - attempts to load in symbols from "vmlinux"
|
||||
# - good for looking at backtraces when Linux gets stuck for some reason
|
||||
#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -gdb tcp::1236 -S & riscv64-unknown-elf-gdb -x gdbinit_debug
|
||||
|
||||
# Uncomment this version to generate qemu_output.txt
|
||||
# - Uses GDB script
|
||||
# - Logs raw QEMU output to qemu_output.txt
|
||||
#($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2> $intermedDir/qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog_debug
|
||||
|
||||
# Uncomment this version for parse_qemu.py debugging
|
||||
# - Uses qemu_output.txt
|
||||
# - Makes qemu_in_gdb_format.txt
|
||||
# - Splits qemu_in_gdb_format.txt into chunks of 100,000 instrs
|
||||
#cat $intermedDir/qemu_output.txt | ./parse_qemu.py >$intermedDir/qemu_in_gdb_format.txt
|
||||
#cd $intermedDir
|
||||
#split -d -l 5600000 ./qemu_in_gdb_format.txt --verbose
|
||||
#cd ../../testvector-generation
|
||||
|
||||
# Uncomment this version for parse_gdb_output.py debugging
|
||||
# - Uses qemu_in_gdb_format.txt
|
||||
# - Makes testvectors#cat $intermedDir/qemu_in_gdb_format.txt | ./parse_gdb_output.py "$outDir"
|
||||
|
||||
# =========== Just Do the Thing ==========
|
||||
# Uncomment this version for the whole thing
|
||||
# - Logs info needed by buildroot testbench
|
||||
($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "$outDir") & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
7
wally-pipelined/linux-testgen/testvector-generation/logBuildrootMem.sh
Executable file
7
wally-pipelined/linux-testgen/testvector-generation/logBuildrootMem.sh
Executable file
@ -0,0 +1,7 @@
|
||||
customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qemu-system-riscv64"
|
||||
imageDir="../buildroot-image-output"
|
||||
($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1235 -S 2>/dev/null >/dev/null) &
|
||||
riscv64-unknown-elf-gdb -x gdbinit_mem
|
||||
echo "Translating Mem from GDB to Questa format"
|
||||
./fix_mem.py
|
||||
echo "Done"
|
@ -9,7 +9,7 @@ csrs = ['fcsr','mcause','mcounteren','medeleg','mepc','mhartid','mideleg','mie',
|
||||
list(map(csrs.remove, ['fcsr','mhartid','pmpcfg0','pmpaddr0','mip']))
|
||||
#output_path = '/courses/e190ax/busybear_boot_new/'
|
||||
#output_path = '/courses/e190ax/buildroot_boot/'
|
||||
output_path = sys.argv[1]
|
||||
output_path = sys.argv[1]+'/'
|
||||
print(f'output dir: {output_path}')
|
||||
instrs = -1
|
||||
try:
|
@ -3,13 +3,16 @@ import fileinput, sys
|
||||
|
||||
sys.stderr.write("reminder: this script takes input from stdin\n")
|
||||
parseState = "idle"
|
||||
beginPageFault = 0
|
||||
inPageFault = 0
|
||||
endPageFault = 0
|
||||
CSRs = {}
|
||||
pageFaultCSRs = {}
|
||||
regs = {}
|
||||
pageFaultRegs = {}
|
||||
instrs = {}
|
||||
instrCount = 0
|
||||
returnAdr = 0
|
||||
|
||||
def printPC(l):
|
||||
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, instrCount
|
||||
@ -33,8 +36,8 @@ def printCSRs():
|
||||
|
||||
def parseCSRs(l):
|
||||
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs
|
||||
if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")):
|
||||
if l.startswith(' x0/zero'):
|
||||
if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")) and not inPageFault:
|
||||
if l.startswith(' x0/zero'):
|
||||
parseState = "regFile"
|
||||
instr = instrs[CSRs["pc"]]
|
||||
printPC(instr)
|
||||
@ -42,24 +45,31 @@ def parseCSRs(l):
|
||||
else:
|
||||
csr = l.split()[0]
|
||||
val = int(l.split()[1],16)
|
||||
if inPageFault:
|
||||
# Commented out this conditional because the pageFault instrs don't corrupt CSRs
|
||||
#if inPageFault:
|
||||
# Not sure if these CSRs should be updated or not during page fault.
|
||||
if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
|
||||
#if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
|
||||
# We do update some CSRs
|
||||
CSRs[csr] = val
|
||||
else:
|
||||
# CSRs[csr] = val
|
||||
#else:
|
||||
# Others we preserve until changed later
|
||||
pageFaultCSRs[csr] = val
|
||||
elif pageFaultCSRs and (csr in pageFaultCSRs):
|
||||
if (val != pageFaultCSRs[csr]):
|
||||
del pageFaultCSRs[csr]
|
||||
CSRs[csr] = val
|
||||
# pageFaultCSRs[csr] = val
|
||||
#elif pageFaultCSRs and (csr in pageFaultCSRs):
|
||||
# if (val != pageFaultCSRs[csr]):
|
||||
# del pageFaultCSRs[csr]
|
||||
# CSRs[csr] = val
|
||||
#else:
|
||||
# CSRs[csr] = val
|
||||
#
|
||||
# However SEPC and STVAL do get corrupted upon exiting
|
||||
if endPageFault and ((csr == 'sepc') or (csr == 'stval')):
|
||||
CSRs[csr] = returnAdr
|
||||
else:
|
||||
CSRs[csr] = val
|
||||
|
||||
def parseRegs(l):
|
||||
global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs
|
||||
if "mcounteren" in l:
|
||||
if "pc" in l:
|
||||
printCSRs()
|
||||
# New non-disassembled instruction
|
||||
parseState = "CSRs"
|
||||
@ -100,8 +110,12 @@ for l in fileinput.input():
|
||||
elif (parseState == "instr") and l.startswith('0x'):
|
||||
if "out of bounds" in l:
|
||||
sys.stderr.write("Detected QEMU page fault error\n")
|
||||
beginPageFault = ~(inPageFault)
|
||||
if beginPageFault:
|
||||
returnAdr = int(l.split()[0][2:-1], 16)
|
||||
inPageFault = 1
|
||||
else:
|
||||
endPageFault = inPageFault
|
||||
inPageFault = 0
|
||||
adr = int(l.split()[0][2:-1], 16)
|
||||
instrs[adr] = l
|
3
wally-pipelined/regression/sim-wally-batch-rv32icfd
Executable file
3
wally-pipelined/regression/sim-wally-batch-rv32icfd
Executable file
@ -0,0 +1,3 @@
|
||||
vsim -c <<!
|
||||
do wally-pipelined-batch-rv32icfd.do ../config/rv32icfd rv32icfd
|
||||
!
|
1
wally-pipelined/regression/sim-wally-rv32icfd
Executable file
1
wally-pipelined/regression/sim-wally-rv32icfd
Executable file
@ -0,0 +1 @@
|
||||
vsim -do wally-pipelined-rv32icfd.do
|
42
wally-pipelined/regression/wally-pipelined-batch-rv32icfd.do
Normal file
42
wally-pipelined/regression/wally-pipelined-batch-rv32icfd.do
Normal file
@ -0,0 +1,42 @@
|
||||
# wally-pipelined-batch.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-pipelined-batch.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined-batch.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined-batch.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work_$2] {
|
||||
vdel -lib work_$2 -all
|
||||
}
|
||||
vlib work_$2
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
switch $argc {
|
||||
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
2 {vlog -work work_$2 +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt work_$2.testbench -work work_$2 -o workopt_$2
|
||||
vsim -lib work_$2 workopt_$2
|
||||
|
||||
run -all
|
||||
quit
|
50
wally-pipelined/regression/wally-pipelined-rv32icfd.do
Normal file
50
wally-pipelined/regression/wally-pipelined-rv32icfd.do
Normal file
@ -0,0 +1,50 @@
|
||||
# wally-pipelined.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-pipelined.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work] {
|
||||
vdel -all
|
||||
}
|
||||
vlib work
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined.do ../config/rv32ic
|
||||
switch $argc {
|
||||
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt +acc work.testbench -o workopt
|
||||
vsim workopt
|
||||
|
||||
view wave
|
||||
-- display input and output signals as hexidecimal values
|
||||
do ./wave-dos/default-waves.do
|
||||
|
||||
-- Run the Simulation
|
||||
#run 5000
|
||||
run -all
|
||||
#quit
|
||||
noview ../testbench/testbench-imperas.sv
|
||||
view wave
|
@ -152,7 +152,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/PCTargetE
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/CSRReadValW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/PrivilegedNextPCM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemRWM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/InstrMisalignedFaultM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/DataMisalignedM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/IllegalBaseInstrFaultD
|
||||
@ -168,7 +168,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/CSRWritePendingDEM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/LoadStallD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/SetFflagsM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FRM_REGW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FloatRegWriteW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/FRegWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemRWAlignedM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/Funct3M
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/MemAdrM
|
||||
@ -337,7 +337,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/Funct3M
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/ReadDataW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/CSRReadValW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/PCLinkW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/StallD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/FlushD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/FlushE
|
||||
@ -397,7 +397,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/FlushW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/ResultSrcW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/CSRWritePendingDEM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/RegWriteE
|
||||
@ -740,8 +740,8 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/CSRReadValW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/PrivilegedNextPCM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/RetM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/TrapM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/FloatRegWriteW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/FRegWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/LoadStallD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/PrivilegedM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/InstrMisalignedFaultM
|
||||
@ -842,8 +842,8 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/uretM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/TimerIntM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/ExtIntM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/SwIntM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/FloatRegWriteW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/FRegWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/LoadStallD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/NextPrivilegeModeM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/PrivilegeModeW
|
||||
@ -937,7 +937,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/WriteSSTATUSM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/WriteUSTATUSM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/TrapM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/FloatRegWriteW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/FRegWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/NextPrivilegeModeM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/PrivilegeModeW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/mretM
|
||||
@ -972,7 +972,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrsr/STATUS_UIE
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/clk
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/reset
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/InstrValidW
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/InstrValidM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/LoadStallD
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/CSRMWriteM
|
||||
add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/counters/CSRAdrM
|
||||
|
@ -8,7 +8,7 @@ add wave /testbench/clk
|
||||
add wave /testbench/reset
|
||||
add wave -divider
|
||||
#add wave /testbench/dut/hart/ebu/IReadF
|
||||
add wave /testbench/dut/hart/DataStall
|
||||
#add wave /testbench/dut/hart/DataStall
|
||||
add wave /testbench/dut/hart/ICacheStallF
|
||||
add wave /testbench/dut/hart/StallF
|
||||
add wave /testbench/dut/hart/StallD
|
||||
|
@ -1,65 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Block Name: add.v
|
||||
// Author: David Harris
|
||||
// Date: 11/12/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the addition of the product and addend. It also
|
||||
// contains logic necessary to adjust the signs for effective subtracts
|
||||
// and negative results.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
module add(rM, sM, tM, sum,
|
||||
negsum, invz, selsum1, negsum0, negsum1, killprodM);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [105:0] rM; // partial product 1
|
||||
input logic [105:0] sM; // partial product 2
|
||||
input logic [163:0] tM; // aligned addend
|
||||
input logic invz; // invert addend
|
||||
input logic selsum1; // select +1 mode of compound adder
|
||||
input logic killprodM; // z >> product
|
||||
input logic negsum; // Negate sum
|
||||
output logic [163:0] sum; // sum
|
||||
output logic negsum0; // sum was negative in +0 mode
|
||||
output logic negsum1; // sum was negative in +1 mode
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [105:0] r2; // partial product possibly zeroed out
|
||||
wire [105:0] s2; // partial product possibly zeroed out
|
||||
wire [164:0] t2; // addend after inversion if necessary
|
||||
wire [164:0] sum0; // sum of compound adder +0 mode
|
||||
wire [164:0] sum1; // sum of compound adder +1 mode
|
||||
wire [163:0] prodshifted; // sum of compound adder +1 mode
|
||||
wire [164:0] tmp; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if z'sM sign is diffrent from the product'sM sign
|
||||
|
||||
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
|
||||
|
||||
// Zero out product if Z >> product or product really should be
|
||||
|
||||
assign r2 = killprodM ? 106'b0 : rM;
|
||||
assign s2 = killprodM ? 106'b0 : sM;
|
||||
|
||||
//***replace this with a more structural cpa that synthisises better
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
|
||||
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
|
||||
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[164];
|
||||
assign negsum1 = sum1[164];
|
||||
|
||||
// Mux proper result (+Oil mode and inversion) using 4:1 mux
|
||||
//assign sumzero = |sum;
|
||||
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,88 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: align.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the alignment shifter. It is responsible for
|
||||
// adjusting the fraction portion of the addend relative to the fraction
|
||||
// produced in the multiplier array.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
killprodE, sumshiftE, sumshiftzeroE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] zman; // Fraction of addend z;
|
||||
input logic [12:0] aligncntE; // amount to shift
|
||||
input logic xzeroE; // Input X = 0
|
||||
input logic yzeroE; // Input Y = 0
|
||||
input logic zzeroE; // Input Z = 0
|
||||
input logic zdenormE; // Input Z is denormalized
|
||||
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // Z >> product
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [215:0] shift; // aligned addend from shifter
|
||||
logic [12:0] tmp;
|
||||
|
||||
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
bsE = 0;
|
||||
|
||||
// And to using product as primary operand in adder I exponent gen
|
||||
killprodE = xzeroE | yzeroE;
|
||||
// d = aligncntE
|
||||
// p = 53
|
||||
//***try reducing this hardware to use one shifter
|
||||
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshiftE = 163; // 3p+4
|
||||
sumshiftzeroE = 0;
|
||||
shift = {1'b1,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0]; // p + 2 - d
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0];
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else begin // d >= p+3
|
||||
// addend anchored case with saturated shift
|
||||
sumshiftE = 0;
|
||||
sumshiftzeroE = 1;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
killprodE = 1;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -1,53 +0,0 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] xExt; // multiplicand xExt
|
||||
input logic [2:0] choose; // bits needed to choose which encoding
|
||||
output logic [1:0] add1; // do you add 1
|
||||
output logic e;
|
||||
output logic [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] temp;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
3'b010 : pp = {1'b0, xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = '1; // -0
|
||||
endcase
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
3'b010 : e = 0; // 1
|
||||
3'b011 : e = 0; // 2
|
||||
3'b100 : e = 1; // -2
|
||||
3'b101 : e = 1; // -1
|
||||
3'b110 : e = 1; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
@ -1,90 +0,0 @@
|
||||
module add3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
output logic [BITS-1:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for(i= 0; i<BITS; i=i+1) begin
|
||||
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
input logic [BITS-1:0] d;
|
||||
output logic [BITS:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
genvar i;
|
||||
|
||||
|
||||
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
generate
|
||||
for(i= 1; i<BITS-1; i=i+1) begin
|
||||
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
endmodule
|
||||
|
||||
module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
assign axorb = a ^ b;
|
||||
assign sum = axorb ^ c;
|
||||
|
||||
assign carry = axorb ? c : a;
|
||||
|
||||
endmodule
|
||||
|
||||
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
input logic d;
|
||||
input logic cin;
|
||||
output logic cout;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
endmodule
|
@ -1,140 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen(xexp, yexp, zexp,
|
||||
killprod, sumzero, resultdenorm, normcnt, infinity,
|
||||
FmaFlagsM, inf, xzero, yzero,expplus1,
|
||||
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
|
||||
aligncnt, wexp,
|
||||
prodof, sumof, sumuf, denorm0, ae);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [62:52] xexp; // Exponent of multiplicand x
|
||||
input [62:52] yexp; // Exponent of multiplicand y
|
||||
input [62:52] zexp; // Exponent of addend z
|
||||
input killprod; // Z >> product
|
||||
input sumzero; // sum exactly equals zero
|
||||
input resultdenorm; // postnormalize rounded result
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input infinity; // generate infinity on overflow
|
||||
input [4:0] FmaFlagsM; // Result invalid
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input [12:0] de0; // X is NaN NaN
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input xdenorm; // Z is denorm
|
||||
input ydenorm; // Z is denorm
|
||||
input zdenorm; // Z is denorm
|
||||
input xzero; // Z is denorm
|
||||
input yzero; // Z is denorm
|
||||
input expplus1;
|
||||
input proddenorm; // product is denorm
|
||||
input specialsel; // Select special result
|
||||
input zexpsel; // Select special result
|
||||
output [12:0] aligncnt; // shift count for alignment shifter
|
||||
output [62:52] wexp; // Exponent of result
|
||||
output prodof; // X*Y exponent out of bounds
|
||||
output sumof; // X*Y+Z exponent out of bounds
|
||||
output sumuf; // X*Y+Z exponent underflows
|
||||
output denorm0; // exponent = 0 for denorm
|
||||
output [12:0] ae; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign ae = xzero|yzero ? 0 : xexp + yexp -1023;
|
||||
|
||||
assign prodof = (ae > 2046 && ~ae[12]);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
assign aligncnt0 = - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
|
||||
assign aligncnt1 = - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
|
||||
assign aligncnt = zexp -ae - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
|
||||
//assign aligncnt = zexp -ae - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
|
||||
//assign aligncnt = zexp - ae;// KEP use all of ae
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
// If the exponent becomes exactly zero (denormalized)
|
||||
// signal such to adjust R bit before rounding
|
||||
|
||||
assign denorm0 = (de0 == 0);
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = resultdenorm | sumzero ? 0 : de0;
|
||||
assign sumof = ~de[12] && de > 2046;
|
||||
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
assign nanres = xnan ? xexp : (ynan ? yexp : (znan? zexp : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
|
||||
endmodule
|
||||
|
@ -1,90 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
xdenormE, ydenormE, zdenormE,
|
||||
aligncntE, prodof, aeE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic xdenormE; // Z is denorm
|
||||
input logic ydenormE; // Z is denorm
|
||||
input logic zdenormE; // Z is denorm
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [12:0] aligncntE; // shift count for alignment shifter
|
||||
output logic prodof; // X*Y exponent out of bounds
|
||||
output logic [12:0] aeE; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
|
||||
|
||||
assign prodof = (aeE > 2046 && ~aeE[12]);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
//assign aligncntE = zexp - aeE;// KEP use all of aeE
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : aeE;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
endmodule
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen2(xexp, yexp, zexp,
|
||||
sumzero, resultdenorm, infinity,
|
||||
FmaFlagsM, inf, expplus1,
|
||||
nanM, de0, xnanM, ynanM, znanM, specialsel,
|
||||
wexp,
|
||||
sumof, sumuf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic sumzero; // sum exactly equals zero
|
||||
input logic resultdenorm; // postnormalize rounded result
|
||||
input logic infinity; // generate infinity on overflow
|
||||
input logic [4:0] FmaFlagsM; // Result invalid
|
||||
input logic inf; // Some input is infinity
|
||||
input logic nanM; // Some input is NaN
|
||||
input logic [12:0] de0; // X is NaN NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic expplus1;
|
||||
input logic specialsel; // Select special result
|
||||
output logic [62:52] wexp; // Exponent of result
|
||||
output logic sumof; // X*Y+Z exponent out of bounds
|
||||
output logic sumuf; // X*Y+Z exponent underflows
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = resultdenorm | sumzero ? 0 : de0;
|
||||
assign sumof = ~de[12] && de > 2046;
|
||||
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
|
||||
endmodule
|
||||
|
||||
|
@ -1,88 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, zsign, xzero, yzero, zzero, vbits, killprod,
|
||||
inf, nan, FmaFlagsM,sticky);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input sticky; // X is Inf
|
||||
input xinf; // X is Inf
|
||||
input yinf; // Y is Inf
|
||||
input zinf; // Z is Inf
|
||||
input prodof; // X*Y overflows exponent
|
||||
input sumof; // X*Y + z underflows exponent
|
||||
input sumuf; // X*Y + z underflows exponent
|
||||
input psign; // Sign of product
|
||||
input zsign; // Sign of z
|
||||
input xzero; // x = 0
|
||||
input yzero; // y = 0
|
||||
input zzero; // y = 0
|
||||
input killprod;
|
||||
input [1:0] vbits; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nan; // Some source is NaN
|
||||
output [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire prodinf; // X*Y larger than max possible
|
||||
wire suminf; // X*Y+Z larger than max possible
|
||||
|
||||
// If any input is NaN, propagate the NaN
|
||||
|
||||
assign nan = xnan || ynan || znan;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinf || yinf || zinf || suminf;//KEP added suminf
|
||||
//assign inf= xinf || yinf || zinf;//original
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinf = prodof && ~xnan && ~ynan;
|
||||
//KEP added if the product is infinity then sum is infinity
|
||||
assign suminf = sumof && ~xnan && ~ynan && ~znan;
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
|
||||
|
||||
endmodule
|
@ -1,34 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanE; // X is NaN
|
||||
input logic ynanE; // Y is NaN
|
||||
input logic znanE; // Z is NaN
|
||||
input logic prodof; // X*Y overflows exponent
|
||||
output logic nanE; // Some source is NaN
|
||||
|
||||
// Internal nodes
|
||||
|
||||
output logic prodinfE; // X*Y larger than max possible
|
||||
|
||||
// If any input logic is NaN, propagate the NaN
|
||||
|
||||
assign nanE = xnanE || ynanE || znanE;
|
||||
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinfE = prodof && ~xnanE && ~ynanE;
|
||||
|
||||
|
||||
endmodule
|
@ -1,80 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
|
||||
xzeroM, yzeroM, zzeroM, vbits, killprodM,
|
||||
inf, nanM, FmaFlagsM,sticky,prodinfM);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xsign; // Sign of z
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic xzeroM; // x = 0
|
||||
input logic yzeroM; // y = 0
|
||||
input logic zzeroM; // y = 0
|
||||
input logic killprodM;
|
||||
input logic [1:0] vbits; // R and S bits of result
|
||||
output logic inf; // Some source is Inf
|
||||
input logic nanM; // Some source is NaN
|
||||
output logic [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic suminf;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
|
||||
//assign inf= xinfM || yinfM || zinfM;//original
|
||||
|
||||
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input logic is denormalized
|
||||
// 2) output logic would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
|
||||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
endmodule
|
@ -1,132 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Exponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
FmaResultM, FmaFlagsM, aligncnt);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] ReadData1E; // input 1
|
||||
input [63:0] ReadData2E; // input 2
|
||||
input [63:0] ReadData3E; // input 3
|
||||
input [2:0] FrmE; // Rounding mode
|
||||
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
|
||||
output [4:0] FmaFlagsM; // status flags
|
||||
output [12:0] aligncnt; // status flags
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic [105:0] r; // one result of partial product sum
|
||||
logic [105:0] s; // other result of partial products
|
||||
logic [163:0] t; // output of alignment shifter
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic [12:0] ae; // multiplier expoent
|
||||
logic bs; // sticky bit of addend
|
||||
logic ps; // sticky bit of product
|
||||
logic killprod; // ReadData3E >> product
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
logic negsum0; // sum +0 < 0
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
logic sumof; // result out of range
|
||||
logic xzero;
|
||||
logic yzero;
|
||||
logic zzero;
|
||||
logic xdenorm;
|
||||
logic ydenorm;
|
||||
logic zdenorm;
|
||||
logic proddenorm;
|
||||
logic zexpsel;
|
||||
logic denorm0;
|
||||
logic resultdenorm;
|
||||
logic inf;
|
||||
logic xinf;
|
||||
logic yinf;
|
||||
logic zinf;
|
||||
logic xnan;
|
||||
logic ynan;
|
||||
logic znan;
|
||||
logic specialsel;
|
||||
logic expplus1;
|
||||
logic nan;
|
||||
logic sumuf;
|
||||
logic psign;
|
||||
logic sticky;
|
||||
logic [8:0] sumshift;
|
||||
logic sumshiftzero;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
|
||||
assign isAdd = 1;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
|
||||
align align(.zman(ReadData3E[51:0]),.*);
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(ReadData3E[62:52]),.*);
|
||||
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
|
||||
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
|
||||
|
||||
endmodule
|
||||
|
@ -1,165 +0,0 @@
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] FInput1E, // X
|
||||
input logic [63:0] FInput2E, // Y
|
||||
input logic [63:0] FInput3E, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [211:0] Shift; // output of the alignment shifter including sticky bit
|
||||
logic XDenormE, YDenormE, ZDenormE; // inputs are denormal
|
||||
logic [63:0] FInput3E2; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
|
||||
|
||||
// split inputs into the sign bit, fraction, and exponent and handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = FInput1E[63];
|
||||
assign YSgn = FInput2E[63];
|
||||
assign ZSgn = FInput3E2[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, FInput1E[62:52]} : {5'b0, FInput1E[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, FInput2E[62:52]} : {5'b0, FInput2E[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, FInput3E2[62:52]} : {5'b0, FInput3E2[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? FInput1E[51:0] : {FInput1E[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? FInput2E[51:0] : {FInput2E[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? FInput3E2[51:0] : {FInput3E2[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
// determine if an input is a special value
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's exponent
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenormE + YDenormE;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
|
||||
|
||||
// Alignment shifter
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Set default values
|
||||
AddendStickyE = 0;
|
||||
KillProdE = 0;
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
AlignedAddendE = {107'b0, ZMan,2'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
Shift = {55'b0, ZMan, 104'b0} << -AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
|
||||
Shift = {55'b0, ZMan, 104'b0} >> AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
AlignedAddendE = 162'b0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -1,282 +0,0 @@
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] FInput1M,
|
||||
input logic [63:0] FInput2M,
|
||||
input logic [63:0] FInput3M,
|
||||
input logic [2:0] FrmM,
|
||||
input logic [105:0] ProdManM,
|
||||
input logic [161:0] AlignedAddendM,
|
||||
input logic [12:0] ProdExpM,
|
||||
input logic FmtM,
|
||||
input logic AddendStickyM,
|
||||
input logic KillProdM,
|
||||
input logic [2:0] FOpCtrlM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
output logic [63:0] FmaResultM,
|
||||
output logic [4:0] FmaFlagsM);
|
||||
|
||||
|
||||
|
||||
logic [51:0] XMan, YMan, ZMan, WMan;
|
||||
logic [10:0] XExp, YExp, ZExp, WExp;
|
||||
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
|
||||
logic [105:0] ProdMan2;
|
||||
logic [162:0] AlignedAddend2;
|
||||
logic [161:0] Sum;
|
||||
logic [162:0] SumTmp;
|
||||
logic [12:0] SumExp;
|
||||
logic [12:0] SumExpMinus1;
|
||||
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
|
||||
logic [53:0] NormSum;
|
||||
logic [161:0] NormSumTmp;
|
||||
logic [8:0] NormCnt;
|
||||
logic NormSumSticky;
|
||||
logic SumZero;
|
||||
logic NegSum;
|
||||
logic InvZ;
|
||||
logic ResultDenorm;
|
||||
logic Sticky;
|
||||
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
|
||||
logic Invalid,Underflow,Overflow,Inexact;
|
||||
logic [8:0] DenormShift;
|
||||
logic ProdInf, ProdOf, ProdUf;
|
||||
logic [63:0] FmaResultTmp;
|
||||
logic SubBySmallNum;
|
||||
logic [63:0] FInput3M2;
|
||||
logic ZeroSgn, ResultSgn;
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
|
||||
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
|
||||
assign XSgn = FInput1M[63];
|
||||
assign YSgn = FInput2M[63];
|
||||
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign XExp = FmtM ? FInput1M[62:52] : {3'b0, FInput1M[62:55]};
|
||||
assign YExp = FmtM ? FInput2M[62:52] : {3'b0, FInput2M[62:55]};
|
||||
assign ZExp = FmtM ? FInput3M2[62:52] : {3'b0, FInput3M2[62:55]};
|
||||
|
||||
assign XMan = FmtM ? FInput1M[51:0] : {FInput1M[54:32], 29'b0};
|
||||
assign YMan = FmtM ? FInput2M[51:0] : {FInput2M[54:32], 29'b0};
|
||||
assign ZMan = FmtM ? FInput3M2[51:0] : {FInput3M2[54:32], 29'b0};
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
|
||||
// Addition
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = SumTmp[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
logic [12:0] ManLen;
|
||||
assign ManLen = FmtM ? 13'd52 : 13'd23;
|
||||
// Determine if the result is denormal
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-ManLen));
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
assign SumExpTmpMinus1 = SumExpTmp-1;
|
||||
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
|
||||
|
||||
// Normalize the sum
|
||||
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = NormSumTmp[161:108];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = FmtM ? (|NormSumTmp[107:0]) : (|NormSumTmp[136:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign SumExp = SumZero ? 13'b0 :
|
||||
ResultDenorm ? 13'b0 :
|
||||
SumExpTmp;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Rounding
|
||||
|
||||
// round to nearest even
|
||||
// {Gaurd, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if NormSum[2] = 1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to -infinity - Plus1 if negitive
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Gaurd, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// Deterimine if the result was supposed to be subtrated by a small number
|
||||
logic Gaurd, Round;
|
||||
assign Gaurd = FmtM ? NormSum[1] : NormSum[30];
|
||||
assign Round = FmtM ? NormSum[0] : NormSum[29];
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: Plus1Tmp = Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&NormSum[2]));//round to nearest even
|
||||
3'b001: Plus1Tmp = 0;//round to zero
|
||||
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
|
||||
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
|
||||
3'b100: Plus1Tmp = (Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky)));//round to nearest max magnitude
|
||||
default: Plus1Tmp = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: Minus1Tmp = 0;//round to nearest even
|
||||
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
|
||||
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
|
||||
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
|
||||
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
|
||||
default: Minus1Tmp = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = Sticky | (Gaurd|Round) ? Plus1Tmp : 1'b0;
|
||||
assign Minus1 = Sticky | (Gaurd|Round) ? Minus1Tmp : 1'b0;
|
||||
// Compute rounded result
|
||||
assign {WExpTmp, WMan} = FmtM ? {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1} : {{SumExp, NormSum[53:31]} - {35'b0, Minus1} + {35'b0, Plus1}, 28'b0};
|
||||
assign WExp = WExpTmp[10:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Sign calculation
|
||||
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
|
||||
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
|
||||
|
||||
// Select the result
|
||||
assign FmaResultM = XNaNM ? (FmtM ? {XSgn, FInput1M[62:52], 1'b1,FInput1M[50:0]} : {XSgn, FInput1M[62:55], 1'b1,FInput1M[53:0]}) :
|
||||
YNaNM ? (FmtM ? {YSgn, FInput2M[62:52], 1'b1,FInput2M[50:0]} : {YSgn, FInput2M[62:55], 1'b1,FInput2M[53:0]}) :
|
||||
ZNaNM ? (FmtM ? {ZSgn, FInput3M2[62:52], 1'b1,FInput3M2[50:0]} : {ZSgn, FInput3M2[62:55], 1'b1,FInput3M2[53:0]}) :
|
||||
Invalid ? (FmtM ? {WSgn, 11'h7ff, 1'b1, 51'b0} : {WSgn, 8'h7f8, 1'b1, 54'b0}) : // has to be before inf
|
||||
XInfM ? {PSgn, FInput1M[62:0]} :
|
||||
YInfM ? {PSgn, FInput2M[62:0]} :
|
||||
ZInfM ? {ZSgn, FInput3M2[62:0]} :
|
||||
Overflow ? (FmtM ? {WSgn, 11'h7ff, 52'b0} : {WSgn, 8'h7f8, 55'b0}) :
|
||||
Underflow & ~ResultDenorm ? (FmtM ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {{WSgn, 31'b0} - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : //***do you need minus1?
|
||||
KillProdM ? (FmtM ? FInput3M2 - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {FInput3M2[63:32] - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : // has to be after Underflow
|
||||
FmtM ? {WSgn,WExp,WMan} : {WSgn,WExp[6:0],WMan,4'b0};
|
||||
logic [63:0] tmp;
|
||||
assign tmp = {WSgn,WExp[6:0],WMan,4'b0};
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) any input is a signaling NaN
|
||||
logic [12:0] MaxExp;
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign ProdOf = (ProdExpM >= MaxExp && ~ProdExpM[12]);
|
||||
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
|
||||
assign SigNaN = FmtM ? (XNaNM&~FInput1M[51]) | (YNaNM&~FInput2M[51]) | (ZNaNM&~FInput3M2[51]) : (XNaNM&~FInput1M[54]) | (YNaNM&~FInput2M[54]) | (ZNaNM&~FInput3M2[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
assign Overflow = WExpTmp >= MaxExp & ~WExpTmp[12];
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
assign ProdUf = KillProdM & ZZeroM;
|
||||
assign Underflow = SumExp[12] | ProdUf;
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
assign Inexact = (Sticky|Overflow| (Gaurd|Round))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
|
||||
|
||||
endmodule
|
||||
|
@ -1,40 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: lop.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements a Leading One Predictor used to determine
|
||||
// the normalization shift count.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module lza(sum, normcnt, sumzero);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [163:0] sum; // sum
|
||||
output logic [8:0] normcnt; // normalization shift count
|
||||
output logic sumzero; // sum = 0
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
// of simplicity, this model is behavioral instead.
|
||||
// A real LOP would also operate on the sources of the adder, not
|
||||
// the result!
|
||||
|
||||
always_comb
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
// Also check if sum is zero
|
||||
assign sumzero = ~(|sum);
|
||||
|
||||
endmodule
|
||||
|
@ -1,136 +0,0 @@
|
||||
|
||||
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] xman; // Fraction of multiplicand x
|
||||
input logic [51:0] yman; // Fraction of multiplicand y
|
||||
input logic xdenormE; // is x denormalized
|
||||
input logic ydenormE; // is y denormalized
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [105:0] rE; // partial product 1
|
||||
output logic [105:0] sE; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [106:0] tmpsE;
|
||||
logic [17:0][106:0] lv1add;
|
||||
logic [11:0][106:0] lv2add;
|
||||
logic [7:0][106:0] lv3add;
|
||||
logic [3:0][106:0] lv4add;
|
||||
logic [21:0][107:0] carryTmp;
|
||||
wire [26:0][106:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(tmpsE));
|
||||
assign sE = tmpsE[105:0];
|
||||
assign rE = {carryTmp[21][104:0], 1'b0};
|
||||
// assign rE = 0;
|
||||
// assign sE = acc[0] +
|
||||
// acc[1] +
|
||||
// acc[2] +
|
||||
// acc[3] +
|
||||
// acc[4] +
|
||||
// acc[5] +
|
||||
// acc[6] +
|
||||
// acc[7] +
|
||||
// acc[8] +
|
||||
// acc[9] +
|
||||
// acc[10] +
|
||||
// acc[11] +
|
||||
// acc[12] +
|
||||
// acc[13] +
|
||||
// acc[14] +
|
||||
// acc[15] +
|
||||
// acc[16] +
|
||||
// acc[17] +
|
||||
// acc[18] +
|
||||
// acc[19] +
|
||||
// acc[20] +
|
||||
// acc[21] +
|
||||
// acc[22] +
|
||||
// acc[23] +
|
||||
// acc[24] +
|
||||
// acc[25] +
|
||||
// acc[26];
|
||||
|
||||
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
|
||||
// assign rE = 0;
|
||||
endmodule
|
||||
|
@ -1,147 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: normalize.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the normalization shift. It also
|
||||
// generates the Rands bits for rounding. Finally, it
|
||||
// handles the special case of a zero sum.
|
||||
//
|
||||
// v[53:2] is the fraction component of the prerounded result.
|
||||
// It can be bypassed back to the X or Z inputs of the FMAC
|
||||
// for back-to-back operations.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
|
||||
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input logic [163:0] sum; // sum
|
||||
input logic [62:52] zexp; // sum
|
||||
input logic [8:0] normcnt; // normalization shift count
|
||||
input logic [12:0] aeM; // normalization shift count
|
||||
input logic [12:0] aligncntM; // normalization shift count
|
||||
input logic [8:0] sumshiftM; // normalization shift count
|
||||
input logic sumshiftzeroM;
|
||||
input logic sumzero; // sum is zero
|
||||
input logic bsM; // sticky bit for addend
|
||||
input logic xdenormM; // Input Z is denormalized
|
||||
input logic ydenormM; // Input Z is denormalized
|
||||
input logic zdenormM; // Input Z is denormalized
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
output logic sticky; //sticky bit
|
||||
output logic [12:0] de0;
|
||||
output logic resultdenorm; // Input Z is denormalized
|
||||
output logic [53:0] v; // normalized sum, R, S bits
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic [163:0] sumshifted; // shifted sum
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
// sticky bit must be computed. Otherwise, the sum is right-shifted
|
||||
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
|
||||
|
||||
// The R bit is also set on denormalized numbers where the exponent
|
||||
// was computed to be exactly -1023 and the L bit was set. This
|
||||
// is required for correct rounding up of multiplication results.
|
||||
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
|
||||
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always_comb
|
||||
begin
|
||||
// d = aligncntM
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = aeM
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
//de0 = aeM-normcnt+2-1023;
|
||||
end else begin
|
||||
sumshifted = sum << (13'd1080+aeM);
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
resultdenorm = 1;
|
||||
de0 = 0;
|
||||
end
|
||||
|
||||
end else begin // extract normalized bits
|
||||
sumshifttmp = {1'b0,sumshiftM} - 2;
|
||||
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
|
||||
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
|
||||
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
|
||||
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
|
||||
tmp4 = sumshifted[160];
|
||||
tmp5 = sumshifted[159];
|
||||
// for some reason use exp = zexp + {0,1,2}
|
||||
// the book says exp = zexp + {-1,0,1}
|
||||
if(sumshiftzeroM) begin
|
||||
v = sum[162:109];
|
||||
sticky = (|sum[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp};
|
||||
end else if(sumshifted[163] & ~sumshifttmp[9])begin
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp} +13'd2;
|
||||
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
|
||||
v = sumshifted[161:108];
|
||||
sticky = (|sumshifted[107:0]) | bsM;
|
||||
de0 = {2'b0,zexp}+13'd1;
|
||||
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {2'b0,zexp}+{12'b0,zdenormM};
|
||||
end else if(sumshifted[160]& ~zdenormM) begin
|
||||
de0 = {2'b0,zexp}-13'b1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(sumshifted[159]& ~zdenormM) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = {2'b0,zexp}-13'd2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(zdenormM) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {{2{zexp[62]}},zexp};
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshiftM-1; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
end
|
||||
|
||||
resultdenorm = (~|de0 | de0[12]);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// shift sum left by normcnt, filling the right with zeros
|
||||
//assign sumshifted = sum << normcnt;
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,124 +0,0 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: round.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
// saves a mux from the writeback path.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v, sticky, FrmM, wsign,
|
||||
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
|
||||
xman, yman, zman,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] v; // normalized sum, R, S bits
|
||||
input logic sticky; //sticky bit
|
||||
input logic [2:0] FrmM;
|
||||
input logic wsign; // Sign of result
|
||||
input logic [4:0] FmaFlagsM;
|
||||
input logic inf; // Some input logic is infinity
|
||||
input logic nanM; // Some input logic is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic [51:0] xman; // input logic X
|
||||
input logic [51:0] yman; // input logic Y
|
||||
input logic [51:0] zman; // input logic Z
|
||||
output logic [51:0] wman; // rounded result of FMAC
|
||||
output logic infinity; // Generate infinity on overflow
|
||||
output logic specialsel; // Select special result
|
||||
output logic expplus1;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
wire [51:0] nanres; // Propagated or generated NaN
|
||||
|
||||
// Compute if round should occur. This equation is derived from
|
||||
// the rounding tables.
|
||||
|
||||
// round to infinity - plus1 if positive
|
||||
// round to -infinity - plus1 if negitive
|
||||
// round to zero - do nothing
|
||||
// round to nearest even
|
||||
// {v[1], v[0], sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
always_comb begin
|
||||
case (FrmM)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
3'b010: plus1 = wsign;//round down
|
||||
3'b011: plus1 = ~wsign;//round up
|
||||
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
|
||||
default: plus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
// (rp & ~wsign) |
|
||||
// (rm & wsign);
|
||||
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
|
||||
// rp && ~wsign && (v[1] || v[0]) ||
|
||||
// rm && wsign && (v[1] || v[0]);
|
||||
|
||||
// Compute rounded result
|
||||
assign v1 = v[53:2] + 1;
|
||||
// Determine if postnormalization is necessary
|
||||
// Predicted by all bits =1 before round +1
|
||||
|
||||
//assign postnormalize = &(v[53:2]) && plus1;
|
||||
|
||||
// Determine special result in event of of selection of a result from
|
||||
// another FPU functional unit, infinity, NAN, or underflow
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// input logics to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
nanM || inf;
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 52'b0 :
|
||||
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input logic if the input logic is NaN. Since we assume all
|
||||
// NaN input logics are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input logic NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input logic NaNs will provide the payload."
|
||||
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
// If the sum is zero and we round up, there is a special case in
|
||||
// which we produce a massive loss of significance and trap to software.
|
||||
// It is handled in the exception unit.
|
||||
assign expplus1 = v1[52] & ~specialsel & plus1;
|
||||
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,111 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: sign.v
|
||||
// Author: David Harris
|
||||
// Date: 12/1/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block manages the signs of the numbers.
|
||||
// 1 = negative
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input logic xsign; // Sign of X
|
||||
input logic ysign; // Sign of Y
|
||||
input logic zsign; // Sign of Z
|
||||
input logic isAdd;
|
||||
input logic negsum0; // Sum in +O mode is negative
|
||||
input logic negsum1; // Sum in +1 mode is negative
|
||||
input logic bsM; // sticky bit from addend
|
||||
input logic [2:0] FrmM; // Round toward minus infinity
|
||||
input logic [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input logic sumzero; // Sum = O
|
||||
input logic zinfM; // Y = Inf
|
||||
input logic inf; // Some input logic = Inf
|
||||
output logic wsign; // Sign of W
|
||||
output logic invz; // Invert addend into adder
|
||||
output logic negsum; // Negate result of adder
|
||||
output logic selsum1; // Select +1 mode from compound adder
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire zerosign; // sign if result= 0
|
||||
wire sumneg; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
logic tmp;
|
||||
|
||||
// Compute sign of product
|
||||
|
||||
assign psign = xsign ^ ysign;
|
||||
|
||||
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
|
||||
|
||||
//do you invert z
|
||||
assign invz = (zsign ^ psign);
|
||||
|
||||
assign selsum1 = invz;
|
||||
//negate sum if its negitive
|
||||
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
|
||||
// is the sum negitive
|
||||
// if p - z is the sum negitive
|
||||
// if -p + z is the sum positive
|
||||
// if -p - z then the sum is negitive
|
||||
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
|
||||
//always @(invz or negsum0 or negsum1 or bsM or ps)
|
||||
// begin
|
||||
// if (~invz) begin // both input logics have same sign
|
||||
// negsum = 0;
|
||||
// selsum1 = 0;
|
||||
// end else if (bsM) begin // sticky bit set on addend
|
||||
// selsum1 = 0;
|
||||
// negsum = negsum0;
|
||||
// end else if (ps) begin // sticky bit set on product
|
||||
// selsum1 = 1;
|
||||
// negsum = negsum1;
|
||||
// end else begin // both sticky bits clear
|
||||
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
|
||||
// selsum1 = ~negsum1; //original
|
||||
// negsum = negsum1;
|
||||
// end
|
||||
//end
|
||||
|
||||
// Compute sign of result
|
||||
// This involves a special case when the sum is zero:
|
||||
// x+x retains the same sign as x even when x = +/- 0.
|
||||
// otherwise, x-x = +O unless in the RM mode when x-x = -0
|
||||
// There is also a special case for NaNs and invalid results;
|
||||
// the sign of the NaN produced is forced to be 0.
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
|
||||
// assign zerosign = (~invz && killprodM) ? zsign : 0;
|
||||
// zero sign
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// addition
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
// subtraction
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
|
||||
assign zerosign = FmaFlagsM[1] ? psign :
|
||||
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
|
||||
(psign^zsign ? psign : FrmM == 3'b010));
|
||||
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
|
||||
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
|
||||
endmodule
|
@ -1,67 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: special.v
|
||||
// Author: David Harris
|
||||
// Date: 12/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements special case handling for unusual operands (e.g.
|
||||
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] ReadData1E; // Input ReadData1E
|
||||
input logic [63:0] ReadData2E; // Input ReadData2E
|
||||
input logic [63:0] ReadData3E; // Input ReadData3E
|
||||
output logic xzeroE; // Input ReadData1E = 0
|
||||
output logic yzeroE; // Input ReadData2E = 0
|
||||
output logic zzeroE; // Input ReadData3E = 0
|
||||
output logic xnanE; // ReadData1E is NaN
|
||||
output logic ynanE; // ReadData2E is NaN
|
||||
output logic znanE; // ReadData3E is NaN
|
||||
output logic xdenormE; // ReadData1E is denormalized
|
||||
output logic ydenormE; // ReadData2E is denormalized
|
||||
output logic zdenormE; // ReadData3E is denormalized
|
||||
output logic xinfE; // ReadData1E is infinity
|
||||
output logic yinfE; // ReadData2E is infinity
|
||||
output logic zinfE; // ReadData3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnanE = &ReadData1E[62:52] && |ReadData1E[51:0];
|
||||
assign ynanE = &ReadData2E[62:52] && |ReadData2E[51:0];
|
||||
assign znanE = &ReadData3E[62:52] && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
|
||||
assign ydenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
|
||||
assign zdenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
|
||||
assign yinfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
|
||||
assign zinfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
// In the circuit implementation, this can be optimized
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|ReadData1E[62:0]);
|
||||
assign yzeroE = ~(|ReadData2E[62:0]);
|
||||
assign zzeroE = ~(|ReadData3E[62:0]);
|
||||
endmodule
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -29,23 +29,27 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
|
||||
|
||||
// Test if the input exponent is zero, because if it is then the
|
||||
// exponent of the converted number should be zero.
|
||||
assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
|
||||
op1[58] | op1[57] | op1[56] | op1[55]);
|
||||
assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
|
||||
op2[58] | op2[57] | op2[56] | op2[55]);
|
||||
assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
|
||||
op1[58] & op1[57] & op1[56] & op1[55]);
|
||||
assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
|
||||
op2[58] & op2[57] & op2[56] &op2[55]);
|
||||
assign Zexp1 = ~(|op1[30:23]);
|
||||
assign Zexp2 = ~(|op2[30:23]);
|
||||
assign Oexp1 = (&op1[30:23]);
|
||||
assign Oexp2 = (&op2[30:23]);
|
||||
// assign Zexp1 = ~(op1[62] | op1[61] | op1[60] | op1[59] |
|
||||
// op1[58] | op1[57] | op1[56] | op1[55]);
|
||||
// assign Zexp2 = ~(op2[62] | op2[61] | op2[60] | op2[59] |
|
||||
// op2[58] | op2[57] | op2[56] | op2[55]);
|
||||
// assign Oexp1 = (op1[62] & op1[61] & op1[60] & op1[59] &
|
||||
// op1[58] & op1[57] & op1[56] & op1[55]);
|
||||
// assign Oexp2 = (op2[62] & op2[61] & op2[60] & op2[59] &
|
||||
// op2[58] & op2[57] & op2[56] &op2[55]);
|
||||
|
||||
// Conditionally convert op1. Lower 29 bits are zero for single precision.
|
||||
assign Float1[62:29] = conv_SP ? {op1[62], {3{(~op1[62]&~Zexp1)|Oexp1}}, op1[61:32]}
|
||||
assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
|
||||
: op1[62:29];
|
||||
assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
|
||||
|
||||
// Conditionally convert op2. Lower 29 bits are zero for single precision.
|
||||
assign Float2[62:29] = conv_SP ? {op2[62],
|
||||
{3{(~op2[62]&~Zexp2)|Oexp2}}, op2[61:32]}
|
||||
assign Float2[62:29] = conv_SP ? {op2[30],
|
||||
{3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
|
||||
: op2[62:29];
|
||||
assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
|
||||
|
||||
@ -54,8 +58,8 @@ module convert_inputs(Float1, Float2, op1, op2, op_type, P);
|
||||
|
||||
assign negate = op_type[2] & ~op_type[1] & op_type[0];
|
||||
assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0];
|
||||
assign Float1[63] = (op1[63] ^ negate) & ~abs_val;
|
||||
assign Float2[63] = op2[63];
|
||||
assign Float1[63] = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
|
||||
assign Float2[63] = conv_SP ? op2[31] : op2[63];
|
||||
|
||||
endmodule // convert_inputs
|
||||
|
||||
|
@ -3,22 +3,21 @@
|
||||
// it conditionally converts single precision values to double
|
||||
// precision values and modifies the sign of op1.
|
||||
// The converted operands are Float1 and Float2.
|
||||
|
||||
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
input [63:0] op2; // 2nd input operand (B)
|
||||
input P; // Result Precision (0 for double, 1 for single)
|
||||
input op_type; // Operation
|
||||
input logic [63:0] op1; // 1st input operand (A)
|
||||
input logic [63:0] op2; // 2nd input operand (B)
|
||||
input logic P; // Result Precision (0 for double, 1 for single)
|
||||
input logic op_type; // Operation
|
||||
|
||||
output [63:0] Float1; // Converted 1st input operand
|
||||
output [63:0] Float2b; // Converted 2nd input operand
|
||||
output logic [63:0] Float1; // Converted 1st input operand
|
||||
output logic [63:0] Float2b; // Converted 2nd input operand
|
||||
|
||||
wire [63:0] Float2;
|
||||
wire Zexp1; // One if the exponent of op1 is zero
|
||||
wire Zexp2; // One if the exponent of op2 is zero
|
||||
wire Oexp1; // One if the exponent of op1 is all ones
|
||||
wire Oexp2; // One if the exponent of op2 is all ones
|
||||
logic [63:0] Float2;
|
||||
logic Zexp1; // One if the exponent of op1 is zero
|
||||
logic Zexp2; // One if the exponent of op2 is zero
|
||||
logic Oexp1; // One if the exponent of op1 is all ones
|
||||
logic Oexp2; // One if the exponent of op2 is all ones
|
||||
|
||||
// Test if the input exponent is zero, because if it is then the
|
||||
// exponent of the converted number should be zero.
|
||||
|
76
wally-pipelined/src/fpu/divconv.sv
Normal file → Executable file
76
wally-pipelined/src/fpu/divconv.sv
Normal file → Executable file
@ -1,11 +1,6 @@
|
||||
// `timescale 1ps/1ps
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk,
|
||||
load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, P, op_type, exp_odd);
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
|
||||
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
|
||||
|
||||
input logic [52:0] d, n;
|
||||
input logic [2:0] sel_muxa, sel_muxb;
|
||||
@ -40,9 +35,7 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
logic [127:0] constant, constant2;
|
||||
logic [63:0] q_const, qp_const, qm_const;
|
||||
logic [63:0] d2, n2;
|
||||
logic [11:0] d3;
|
||||
|
||||
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7, muxr_out;
|
||||
logic [11:0] d3;
|
||||
|
||||
// Check if exponent is odd for sqrt
|
||||
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
|
||||
@ -68,9 +61,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
|
||||
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
|
||||
// TDM multiplier (carry/save)
|
||||
multiplier mult1 (mcand, mplier, Sum, Carry); // ***multiply
|
||||
multiplier mult1 (mcand, mplier, Sum, Carry);
|
||||
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
|
||||
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2); //***adder
|
||||
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
|
||||
// Add ulp for subtraction in remainder
|
||||
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
|
||||
|
||||
@ -80,15 +73,17 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
|
||||
|
||||
// CPA (from CSA)/Remainder addition/subtraction
|
||||
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out); //***adder
|
||||
adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
|
||||
|
||||
// Assuming [1,2) - q1
|
||||
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0); //***adder
|
||||
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0); //***adder
|
||||
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1); //***adder
|
||||
// Assuming [0.5,1) - q0
|
||||
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0); //***adder
|
||||
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0); //***adder
|
||||
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1); //***adder
|
||||
adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
|
||||
adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
|
||||
adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
|
||||
// Assuming [0.5,1) - q0
|
||||
adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
|
||||
adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
|
||||
adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
|
||||
|
||||
// One's complement instead of two's complement (for hw efficiency)
|
||||
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
|
||||
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);
|
||||
@ -112,9 +107,11 @@ endmodule // divconv
|
||||
|
||||
// module adder #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
// input logic cin,
|
||||
// output logic [WIDTH-1:0] y,
|
||||
// output logic cout);
|
||||
|
||||
// assign y = a + b;
|
||||
// assign {cout, y} = a + b + cin;
|
||||
|
||||
// endmodule // adder
|
||||
|
||||
@ -226,10 +223,33 @@ endmodule // divconv
|
||||
|
||||
// endmodule // mux6
|
||||
|
||||
// module eqcmp #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] a, b,
|
||||
// output logic y);
|
||||
module eqcmp #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
output logic y);
|
||||
|
||||
// assign y = (a == b);
|
||||
assign y = (a == b);
|
||||
|
||||
// endmodule // eqcmp
|
||||
endmodule // eqcmp
|
||||
|
||||
// module fa (input logic a, b, c, output logic sum, carry);
|
||||
|
||||
// assign sum = a^b^c;
|
||||
// assign carry = a&b|a&c|b&c;
|
||||
|
||||
// endmodule // fa
|
||||
|
||||
// module csa #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b, c,
|
||||
// output logic [WIDTH-1:0] sum, carry);
|
||||
|
||||
// logic [WIDTH:0] carry_temp;
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i=0;i<WIDTH;i=i+1)
|
||||
// begin : genbit
|
||||
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
|
||||
// end
|
||||
// endgenerate
|
||||
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
|
||||
|
||||
// endmodule // csa
|
||||
|
@ -1,38 +1,36 @@
|
||||
// Exception logic for the floating point adder. Note: We may
|
||||
// actually want to move to where the result is computed.
|
||||
|
||||
module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
|
||||
input [63:0] A; // 1st input operand (op1)
|
||||
input [63:0] B; // 2nd input operand (op2)
|
||||
input op_type; // Determine operation
|
||||
input logic [63:0] A; // 1st input operand (op1)
|
||||
input logic [63:0] B; // 2nd input operand (op2)
|
||||
input logic op_type; // Determine operation
|
||||
|
||||
output [2:0] Ztype; // Indicates type of result (Z)
|
||||
output Invalid; // Invalid operation exception
|
||||
output Denorm; // Denormalized input
|
||||
output ANorm; // A is not zero or Denorm
|
||||
output BNorm; // B is not zero or Denorm
|
||||
output logic [2:0] Ztype; // Indicates type of result (Z)
|
||||
output logic Invalid; // Invalid operation exception
|
||||
output logic Denorm; // Denormalized input
|
||||
output logic ANorm; // A is not zero or Denorm
|
||||
output logic BNorm; // B is not zero or Denorm
|
||||
|
||||
wire AzeroM; // '1' if the mantissa of A is zero
|
||||
wire BzeroM; // '1' if the mantissa of B is zero
|
||||
wire AzeroE; // '1' if the exponent of A is zero
|
||||
wire BzeroE; // '1' if the exponent of B is zero
|
||||
wire AonesE; // '1' if the exponent of A is all ones
|
||||
wire BonesE; // '1' if the exponent of B is all ones
|
||||
wire ADenorm; // '1' if A is a denomalized number
|
||||
wire BDenorm; // '1' if B is a denomalized number
|
||||
wire AInf; // '1' if A is infinite
|
||||
wire BInf; // '1' if B is infinite
|
||||
wire AZero; // '1' if A is 0
|
||||
wire BZero; // '1' if B is 0
|
||||
wire ANaN; // '1' if A is a not-a-number
|
||||
wire BNaN; // '1' if B is a not-a-number
|
||||
wire ASNaN; // '1' if A is a signalling not-a-number
|
||||
wire BSNaN; // '1' if B is a signalling not-a-number
|
||||
wire ZQNaN; // '1' if result Z is a quiet NaN
|
||||
wire ZInf; // '1' if result Z is an infnity
|
||||
wire square_root; // '1' if square root operation
|
||||
wire Zero; // '1' if result is zero
|
||||
logic AzeroM; // '1' if the mantissa of A is zero
|
||||
logic BzeroM; // '1' if the mantissa of B is zero
|
||||
logic AzeroE; // '1' if the exponent of A is zero
|
||||
logic BzeroE; // '1' if the exponent of B is zero
|
||||
logic AonesE; // '1' if the exponent of A is all ones
|
||||
logic BonesE; // '1' if the exponent of B is all ones
|
||||
logic ADenorm; // '1' if A is a denomalized number
|
||||
logic BDenorm; // '1' if B is a denomalized number
|
||||
logic AInf; // '1' if A is infinite
|
||||
logic BInf; // '1' if B is infinite
|
||||
logic AZero; // '1' if A is 0
|
||||
logic BZero; // '1' if B is 0
|
||||
logic ANaN; // '1' if A is a not-a-number
|
||||
logic BNaN; // '1' if B is a not-a-number
|
||||
logic ASNaN; // '1' if A is a signalling not-a-number
|
||||
logic BSNaN; // '1' if B is a signalling not-a-number
|
||||
logic ZQNaN; // '1' if result Z is a quiet NaN
|
||||
logic ZInf; // '1' if result Z is an infnity
|
||||
logic Zero; // '1' if result is zero
|
||||
|
||||
parameter [51:0] fifty_two_zeros = 52'h0; // Use parameter?
|
||||
|
||||
@ -93,4 +91,3 @@ module exception_div (Ztype, Invalid, Denorm, ANorm, BNorm, A, B, op_type);
|
||||
assign Ztype[2] = BZero&~op_type;
|
||||
|
||||
endmodule // exception
|
||||
|
||||
|
417
wally-pipelined/src/fpu/faddcvt.sv
Executable file
417
wally-pipelined/src/fpu/faddcvt.sv
Executable file
@ -0,0 +1,417 @@
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller exponent,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put sum onto output.
|
||||
//
|
||||
|
||||
module faddcvt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic [63:0] FSrcXE, // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE, // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
output logic [4:0] FAddFlgM); // IEEE exception flags
|
||||
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
logic [63:0] AddSumTcE, AddSumTcM;
|
||||
logic [3:0] AddSelInvE, AddSelInvM;
|
||||
logic [10:0] AddExpPostSumE,AddExpPostSumM;
|
||||
logic AddCorrSignE, AddCorrSignM;
|
||||
logic AddOp1NormE, AddOp1NormM;
|
||||
logic AddOp2NormE, AddOp2NormM;
|
||||
logic AddOpANormE, AddOpANormM;
|
||||
logic AddOpBNormE, AddOpBNormM;
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic AddConvertE, AddConvertM;
|
||||
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
|
||||
|
||||
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
|
||||
|
||||
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
|
||||
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
|
||||
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
endmodule
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] FSrcXE; // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtE;
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output logic [63:0] AddFloat1E;
|
||||
output logic [63:0] AddFloat2E;
|
||||
output logic [10:0] AddExponentE;
|
||||
output logic [10:0] AddExpPostSumE;
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE;
|
||||
output logic [3:0] AddSelInvE;
|
||||
output logic AddCorrSignE;
|
||||
output logic AddSignAE;
|
||||
output logic AddOp1NormE, AddOp2NormE;
|
||||
output logic AddOpANormE, AddOpBNormE;
|
||||
output logic AddInvalidE;
|
||||
output logic AddDenormInE;
|
||||
// output logic exp_valid;
|
||||
output logic AddConvertE;
|
||||
output logic AddSwapE;
|
||||
output logic AddNormOvflowE;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the FOpCtrlE , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
AddFloat1E, AddFloat2E, FOpCtrlE);
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
// changed to a behavior level to allow the tools to try to optimize
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, AddFloat1E[62:52]};
|
||||
assign exp2 = {1'b0, AddFloat2E[62:52]};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if FOpCtrlE does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign AddSwapE = exp_diff1[11] & ~zeroB;
|
||||
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
|
||||
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
|
||||
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
// modified to 52 bits to detect leading zeroes on denormalized mantissas
|
||||
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
// Unless the number has an exponent of zero, in which case it
|
||||
// is unpacked as
|
||||
// 000.00 ... 00
|
||||
// This effectively flushes denormalized values to zero.
|
||||
// The three bits of to the left of the binary point prevent overflow
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
|
||||
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
|
||||
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
// The size of the barrel shifter could be reduced by two bits
|
||||
// by not adding the leading two zeros until after the shift.
|
||||
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
// assign IntValue [31:0] = FSrcXE[31:0];
|
||||
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
|
||||
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and AddConvertM SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put AddSumM onto output.
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
input FmtM; // Result Precision (0 for double, 1 for single)
|
||||
// input AddOvEnM; // Overflow trap enabled
|
||||
// input AddUnEnM; // Underflow trap enabled
|
||||
input [63:0] AddSumM, AddSumTcM;
|
||||
input [63:0] AddFloat1M;
|
||||
input [63:0] AddFloat2M;
|
||||
input [11:0] AddExp1DenormM, AddExp2DenormM;
|
||||
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
|
||||
//input exp_valid;
|
||||
input [3:0] AddSelInvM;
|
||||
input AddOp1NormM, AddOp2NormM;
|
||||
input AddOpANormM, AddOpBNormM;
|
||||
input AddInvalidM;
|
||||
input AddDenormInM;
|
||||
input AddSignAM;
|
||||
input AddCorrSignM;
|
||||
input AddConvertM;
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] FAddResM; // Result of operation
|
||||
output [4:0] FAddFlgM; // IEEE exception flags
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtM;
|
||||
|
||||
wire [10:0] exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [63:0] sum_norm, sum_norm_w_bypass;
|
||||
wire [5:0] norm_shift, norm_shift_denorm;
|
||||
wire exp_valid;
|
||||
wire DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire Sticky_out;
|
||||
wire sign_corr;
|
||||
wire zeroB;
|
||||
wire [10:0] AddExpPostSumM;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
wire Float1_sum_comp;
|
||||
wire Float2_sum_comp;
|
||||
wire Float1_sum_tc_comp;
|
||||
wire Float2_sum_tc_comp;
|
||||
wire normal_underflow;
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
|
||||
logic AddOvEnM; // Overflow trap enabled
|
||||
logic AddUnEnM; // Underflow trap enabled
|
||||
|
||||
assign AddOvEnM = 1'b1;
|
||||
assign AddUnEnM = 1'b1;
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
assign exp_pre = AddDenormInM ?
|
||||
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
|
||||
: (AddConvertM ? 11'b10000111100 : AddExponentM);
|
||||
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
|
||||
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
|
||||
// Determines the correct Float value to compare based on AddSwapM result
|
||||
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
|
||||
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting AddSumM
|
||||
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
|
||||
|
||||
// If the AddSumM is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
|
||||
|
||||
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
|
||||
|
||||
// Barell shifter used for normalization. It takes as inputs the
|
||||
// the corrected AddSumM and the amount by which the AddSumM should
|
||||
// be right shifted. It outputs the normalized AddSumM.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
|
||||
// At this point, normalization has already been performed, so we know
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
|
||||
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
|
||||
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign FAddResM = Result;
|
||||
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -2,49 +2,52 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-Single 1-Double
|
||||
input logic XSgnE,
|
||||
input logic [51:0] XFracE,
|
||||
input logic XNaNE,
|
||||
input logic XSNaNE,
|
||||
input logic XNormE,
|
||||
input logic XDenormE,
|
||||
input logic XZeroE,
|
||||
input logic XInfE,
|
||||
// input logic FmtE, // 0-Single 1-Double
|
||||
output logic [63:0] ClassResE
|
||||
);
|
||||
|
||||
logic [31:0] Single;
|
||||
logic [63:0] Double;
|
||||
logic Sgn;
|
||||
logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, QNaN, PZero, PNorm, PDenorm;
|
||||
logic NInf, SNaN, NZero, NNorm, NDenorm;
|
||||
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
// logic XSgnE;
|
||||
// logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, PZero, PNorm, PDenorm;
|
||||
logic NInf, NZero, NNorm, NDenorm;
|
||||
// logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
|
||||
// Single and Double precision layouts
|
||||
assign Single = SrcXE[63:32];
|
||||
assign Double = SrcXE;
|
||||
assign Sgn = SrcXE[63];
|
||||
// assign XSgnE = FmtE ? FSrcXE[63] : FSrcXE[31];
|
||||
|
||||
// basic calculations for readabillity
|
||||
|
||||
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
|
||||
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
|
||||
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
|
||||
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
|
||||
// assign ExpZero = FmtE ? ~|FSrcXE[62:52] : ~|FSrcXE[30:23];
|
||||
// assign MaxExp = FmtE ? &FSrcXE[62:52] : &FSrcXE[30:23];
|
||||
// assign ManZero = FmtE ? ~|FSrcXE[51:0] : ~|FSrcXE[22:0];
|
||||
// assign FirstBitFrac = FmtE ? FSrcXE[51] : FSrcXE[22];
|
||||
|
||||
// determine the type of number
|
||||
assign NaN = MaxExp & ~ManZero;
|
||||
assign Inf = MaxExp & ManZero;
|
||||
assign Zero = ExpZero & ManZero;
|
||||
assign Denorm= ExpZero & ~ManZero;
|
||||
assign Norm = ~ExpZero;
|
||||
// assign NaN = MaxExp & ~ManZero;
|
||||
// assign Inf = MaxExp & ManZero;
|
||||
// assign Zero = ExpZero & ManZero;
|
||||
// assign Denorm= ExpZero & ~ManZero;
|
||||
// assign Norm = ~ExpZero;
|
||||
|
||||
// determine the sub categories
|
||||
assign QNaN = FirstBitFrac&NaN;
|
||||
assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~Sgn&Inf;
|
||||
assign NInf = Sgn&Inf;
|
||||
assign PNorm = ~Sgn&Norm;
|
||||
assign NNorm = Sgn&Norm;
|
||||
assign PDenorm = ~Sgn&Denorm;
|
||||
assign NDenorm = Sgn&Denorm;
|
||||
assign PZero = ~Sgn&Zero;
|
||||
assign NZero = Sgn&Zero;
|
||||
// assign QNaN = FirstBitFrac&NaN;
|
||||
// assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~XSgnE&XInfE;
|
||||
assign NInf = XSgnE&XInfE;
|
||||
assign PNorm = ~XSgnE&XNormE;
|
||||
assign NNorm = XSgnE&XNormE;
|
||||
assign PDenorm = ~XSgnE&XDenormE;
|
||||
assign NDenorm = XSgnE&XDenormE;
|
||||
assign PZero = ~XSgnE&XZeroE;
|
||||
assign NZero = XSgnE&XZeroE;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -Inf
|
||||
@ -57,6 +60,6 @@ module fclassify (
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
||||
|
@ -42,28 +42,32 @@
|
||||
module fcmp (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic [63:0] FSrcXE,
|
||||
input logic [63:0] FSrcYE,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
input logic FmtE,
|
||||
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] CmpResE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
logic [1:0] FCC; // Condition Codes
|
||||
logic [7:0] w, x;
|
||||
logic ANaN, BNaN;
|
||||
logic Azero, Bzero;
|
||||
// logic ANaN, BNaN;
|
||||
// logic Azero, Bzero;
|
||||
logic LT; // magnitude op1 < magnitude op2
|
||||
logic EQ; // magnitude op1 = magnitude op2
|
||||
|
||||
|
||||
|
||||
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
@ -72,24 +76,10 @@ module fcmp (
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .FSrcXE, .FSrcYE, .*);
|
||||
|
||||
endmodule // fpcomp
|
||||
|
||||
// module magcompare2b (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// // Determine if A < B using a minimized sum-of-products expression
|
||||
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// // Determine if A > B using a minimized sum-of-products expression
|
||||
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
@ -195,135 +185,6 @@ module magcompare64b_1 (w, x, A, B);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
|
||||
|
||||
input logic [63:0] A;
|
||||
input logic [63:0] B;
|
||||
input logic [2:0] FOpCtrlE;
|
||||
|
||||
logic dp, sp, hp;
|
||||
|
||||
output logic ANaN;
|
||||
output logic BNaN;
|
||||
output logic Azero;
|
||||
output logic Bzero;
|
||||
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Test if A or B is NaN.
|
||||
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
|
||||
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
|
||||
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
|
||||
(hp&(A[57]|A[56])));
|
||||
|
||||
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
|
||||
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
|
||||
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
|
||||
(hp&(B[57]|B[56])));
|
||||
|
||||
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
|
||||
// the 63 least siginficant bits of A are zero).
|
||||
// Depending on how this synthesizes, it may work better to replace
|
||||
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
|
||||
assign Azero = (A[62:0] == 63'h0);
|
||||
assign Bzero = (B[62:0] == 63'h0);
|
||||
|
||||
endmodule // exception_cmp
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
|
||||
/*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
// Determine if A < B using a minimized sum-of-products expression
|
||||
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// Determine if A > B using a minimized sum-of-products expression
|
||||
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
endmodule*/ // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
// module magcompare2c (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// assign LT = B[1] | (!A[1]&B[0]);
|
||||
// assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
@ -385,6 +246,8 @@ endmodule // magcompare64b
|
||||
module exception_cmp_2 (
|
||||
input logic [63:0] A,
|
||||
input logic [63:0] B,
|
||||
input logic [63:0] FSrcXE,
|
||||
input logic [63:0] FSrcYE,
|
||||
input logic FmtE,
|
||||
input logic LT_mag,
|
||||
input logic EQ_mag,
|
||||
@ -453,8 +316,8 @@ module exception_cmp_2 (
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpResE = LT ? A : B;//min
|
||||
3'b101: CmpResE = GT ? A : B;//max
|
||||
3'b111: CmpResE = LT ? FSrcXE : FSrcYE;//min
|
||||
3'b101: CmpResE = GT ? FSrcXE : FSrcYE;//max
|
||||
3'b010: CmpResE = {63'b0, EQ};//equal
|
||||
3'b001: CmpResE = {63'b0, LT};//less than
|
||||
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
|
||||
|
@ -6,7 +6,7 @@ module fctrl (
|
||||
input logic [2:0] Funct3D,
|
||||
input logic [2:0] FRM_REGW,
|
||||
output logic IllegalFPUInstrD,
|
||||
output logic FWriteEnD,
|
||||
output logic FRegWriteD,
|
||||
output logic FDivStartD,
|
||||
output logic [2:0] FResultSelD,
|
||||
output logic [3:0] FOpCtrlD,
|
||||
@ -21,7 +21,7 @@ module fctrl (
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
case(OpD)
|
||||
// FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
|
||||
@ -64,44 +64,44 @@ module fctrl (
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
// unswizzle control bits
|
||||
assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// if dynamic rounding, choose FRM_REGW
|
||||
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
|
||||
@ -109,7 +109,7 @@ module fctrl (
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
|
190
wally-pipelined/src/fpu/fcvt.sv
Normal file
190
wally-pipelined/src/fpu/fcvt.sv
Normal file
@ -0,0 +1,190 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
module fcvt (
|
||||
input logic XSgnE,
|
||||
input logic [10:0] XExpE,
|
||||
input logic [51:0] XFracE,
|
||||
input logic XAssumed1E,
|
||||
input logic XZeroE,
|
||||
input logic XNaNE,
|
||||
input logic XInfE,
|
||||
input logic XDenormE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [`XLEN-1:0] SrcAE, // integer input
|
||||
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [5:0] LZResP; // lz output
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
|
||||
logic [64+1:0] ShiftedMan; // shifted mantissa truncated
|
||||
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
|
||||
logic [63:0] Rounded; // rounded result
|
||||
logic [12:0] ExpVal; // unbiased X exponent
|
||||
logic [12:0] ShiftCnt; // how much is the mantissa shifted
|
||||
logic [64-1:0] IntIn; // trimed integer input
|
||||
logic [64-1:0] PosInt; // absolute value of the integer input
|
||||
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
|
||||
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
|
||||
logic Of, Uf; // did the integer result underflow or overflow
|
||||
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
|
||||
logic Plus1,CalcPlus1; // do you add one for rounding
|
||||
logic SgnRes; // sign of the floating point result
|
||||
logic Res64, In64; // is the result or input 64 bits
|
||||
logic RoundMSB; // most significant bit of the fraction
|
||||
logic RoundSgn; // sign of the rounded result
|
||||
|
||||
// FOpCtrlE:
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int}
|
||||
|
||||
// calculate signals based off the input and output's size
|
||||
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
|
||||
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
|
||||
// calulate the unbiased exponent
|
||||
assign ExpVal = XExpE - BiasE + XDenormE;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
// position the input in the most significant bits
|
||||
assign IntIn = FOpCtrlE[3] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
|
||||
// make the integer positive
|
||||
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
|
||||
// determine the integer's sign
|
||||
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
|
||||
|
||||
// generate
|
||||
// if(`XLEN == 64)
|
||||
// lz64 lz(LZResP, LZResV, PosInt);
|
||||
// else if(`XLEN == 32) begin
|
||||
// assign LZResP[5] = 1'b0;
|
||||
// lz32 lz(LZResP[4:0], LZResV, PosInt);
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~PosInt[64-1-i] && i < `XLEN) i = i+1; // search for leading one
|
||||
LZResP = i+1; // compute shift count
|
||||
end
|
||||
|
||||
// if no one was found set to zero otherwise calculate the exponent
|
||||
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
|
||||
|
||||
// select the shift value and amount based on operation (to fp or int)
|
||||
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0};
|
||||
|
||||
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
|
||||
// if the shift is negitive add a bit for sticky bit calculation
|
||||
// otherwise shift left
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt;
|
||||
|
||||
// truncate the shifted mantissa
|
||||
assign ShiftedMan = ShiftedManTmp[64+51:50];
|
||||
|
||||
// calculate sticky bit
|
||||
// - take into account the possible right shift from before
|
||||
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
|
||||
assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
|
||||
assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
|
||||
3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
|
||||
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
// dont tound if the result is exact
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
|
||||
|
||||
// round the shifted mantissa
|
||||
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
|
||||
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
|
||||
|
||||
// fit the rounded result into the appropriate size and take the 2's complement if needed
|
||||
assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
|
||||
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
|
||||
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
|
||||
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
|
||||
|
||||
|
||||
// check if the result overflows
|
||||
assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
|
||||
|
||||
// check if the result underflows (this calculation changes if the result is signed or unsigned)
|
||||
assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
|
||||
// calculate the result's sign
|
||||
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
|
||||
|
||||
// select the integer result
|
||||
assign CvtIntRes = Of ? FOpCtrlE[2] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
|
||||
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
|
||||
Rounded[64-1:0];
|
||||
|
||||
// select the floating point result
|
||||
assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
|
||||
|
||||
// select the result
|
||||
assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
|
||||
|
||||
// calculate the flags
|
||||
// - to int only sets the invalid flag
|
||||
// - from int only sets the inexact flag
|
||||
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -27,40 +27,40 @@
|
||||
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E,
|
||||
input logic FWriteEnM, FWriteEnW,
|
||||
input logic FRegWriteM, FRegWriteW,
|
||||
input logic [4:0] RdM, RdW,
|
||||
input logic [2:0] FResultSelM,
|
||||
output logic FStallD,
|
||||
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
ForwardXE = 2'b00; // choose FRD1E
|
||||
ForwardYE = 2'b00; // choose FRD2E
|
||||
ForwardZE = 2'b00; // choose FRD3E
|
||||
FForwardXE = 2'b00; // choose FRD1E
|
||||
FForwardYE = 2'b00; // choose FRD2E
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
if ((Adr1E == RdM) & FWriteEnM)
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr2E == RdM) & FWriteEnM)
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr3E == RdM) & FWriteEnM)
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
|
@ -1,10 +1,177 @@
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [10:0] XExpE, YExpE, ZExpE,
|
||||
input logic [51:0] XFracE, YFracE, ZFracE,
|
||||
input logic XSgnM, YSgnM, ZSgnM,
|
||||
input logic [10:0] XExpM, YExpM, ZExpM,
|
||||
input logic [51:0] XFracM, YFracM, ZFracM,
|
||||
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic [10:0] BiasE,
|
||||
output logic [63:0] FMAResM,
|
||||
output logic [4:0] FMAFlgM);
|
||||
|
||||
|
||||
logic [105:0] ProdManE, ProdManM;
|
||||
logic [161:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
|
||||
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
|
||||
.BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE},
|
||||
{AddendStickyM, KillProdM});
|
||||
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM,
|
||||
.FOpCtrlM, .FrmM, .FmtM,
|
||||
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
module fma1(
|
||||
// input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [10:0] XExpE, YExpE, ZExpE,
|
||||
input logic [51:0] XFracE, YFracE, ZFracE,
|
||||
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE // set the product to zero before addition if the product is too small to matter
|
||||
);
|
||||
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExpE + YExpE - BiasE + XDenormE + YDenormE;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
|
||||
input logic XSgnM, YSgnM, ZSgnM,
|
||||
input logic [10:0] XExpM, YExpM, ZExpM,
|
||||
input logic [51:0] XFracM, YFracM, ZFracM,
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtM, // precision 1 = double 0 = single
|
||||
@ -16,6 +183,7 @@ module fma2(
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
output logic [63:0] FMAResM, // FMA final result
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
@ -24,8 +192,6 @@ module fma2(
|
||||
logic [51:0] ResultFrac; // Result fraction
|
||||
logic [10:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic [10:0] ZExp; // input exponent
|
||||
logic XSgn, YSgn, ZSgn; // input sign
|
||||
logic PSgn; // product sign
|
||||
logic [105:0] ProdMan2; // product being added
|
||||
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
|
||||
@ -61,28 +227,10 @@ module fma2(
|
||||
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select input fields
|
||||
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
|
||||
|
||||
// split inputs into the sign bit, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
@ -93,7 +241,7 @@ module fma2(
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
assign InvZ = ZSgnM ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
|
||||
@ -148,7 +296,7 @@ module fma2(
|
||||
assign FracLen = FmtM ? 13'd52 : 13'd23;
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
@ -273,13 +421,13 @@ module fma2(
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
|
||||
|
||||
@ -297,9 +445,8 @@ module fma2(
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
|
||||
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
@ -327,28 +474,28 @@ module fma2(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
|
||||
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
|
||||
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
|
||||
assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]};
|
||||
assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]};
|
||||
assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]};
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
|
||||
{ResultSgn, 11'h7ff, 52'b0} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
|
||||
{ResultSgn, 8'hff, 55'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
|
||||
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
|
||||
{{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign FMAResM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
XInfM ? {PSgn, X[62:0]} :
|
||||
YInfM ? {PSgn, Y[62:0]} :
|
||||
ZInfM ? {ZSgn, Addend[62:0]} :
|
||||
XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} :
|
||||
XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} :
|
||||
XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} :
|
||||
Overflow ? OverflowResult :
|
||||
KillProdM ? KillProdResult : // has to be after Underflow
|
||||
Underflow & ~ResultDenorm ? UnderflowResult :
|
||||
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
|
||||
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
|
||||
{{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
|
||||
|
||||
|
||||
|
@ -1,184 +0,0 @@
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
152
wally-pipelined/src/fpu/fpdiv.sv
Executable file
152
wally-pipelined/src/fpu/fpdiv.sv
Executable file
@ -0,0 +1,152 @@
|
||||
//
|
||||
// File name : fpdiv
|
||||
// Title : Floating-Point Divider/Square-Root
|
||||
// project : FPU
|
||||
// Library : fpdiv
|
||||
// Author(s) : James E. Stine, Jr.
|
||||
// Purpose : definition of main unit to floating-point div/sqrt
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Basic Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Exponent Logic
|
||||
// Step 4: Divide/Sqrt using Goldschmidt
|
||||
// Step 5: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 6: Round the result.//
|
||||
// Step 7: Put quotient/remainder onto output.
|
||||
//
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
module fpdiv (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
|
||||
start, reset, clk);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
input [63:0] op2; // 2nd input operand (B)
|
||||
input [1:0] rm; // Rounding mode - specify values
|
||||
input op_type; // Function opcode
|
||||
input P; // Result Precision (0 for double, 1 for single)
|
||||
input OvEn; // Overflow trap enabled
|
||||
input UnEn; // Underflow trap enabled
|
||||
input start;
|
||||
input reset;
|
||||
input clk;
|
||||
|
||||
output [63:0] AS_Result; // Result of operation
|
||||
output [4:0] Flags; // IEEE exception flags
|
||||
output Denorm; // Denorm on input or output
|
||||
logic done;
|
||||
// output done;
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
|
||||
wire [63:0] Float1;
|
||||
wire [63:0] Float2;
|
||||
wire [63:0] IntValue;
|
||||
|
||||
wire [12:0] exp1, exp2, expF;
|
||||
wire [12:0] exp_diff, bias;
|
||||
wire [13:0] exp_sqrt;
|
||||
wire [12:0] exp_s;
|
||||
wire [12:0] exp_c;
|
||||
|
||||
wire [10:0] exponent, exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [52:0] mantissaA;
|
||||
wire [52:0] mantissaB;
|
||||
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
|
||||
|
||||
wire [5:0] align_shift;
|
||||
wire [5:0] norm_shift;
|
||||
wire [2:0] sel_inv;
|
||||
wire op1_Norm, op2_Norm;
|
||||
wire opA_Norm, opB_Norm;
|
||||
wire Invalid;
|
||||
wire DenormIn, DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire signResult, sign_corr;
|
||||
wire corr_sign;
|
||||
wire zeroB;
|
||||
wire convert;
|
||||
wire swap;
|
||||
wire sub;
|
||||
|
||||
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
||||
wire [63:0] rega_out, regb_out, regc_out, regd_out;
|
||||
wire [127:0] regr_out;
|
||||
wire [2:0] sel_muxa, sel_muxb;
|
||||
wire sel_muxr;
|
||||
wire load_rega, load_regb, load_regc, load_regd, load_regr;
|
||||
|
||||
wire donev, sel_muxrv, sel_muxsv;
|
||||
wire [1:0] sel_muxav, sel_muxbv;
|
||||
wire load_regav, load_regbv, load_regcv;
|
||||
wire load_regrv, load_regsv;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the op_type , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
||||
Float1, Float2, op_type);
|
||||
|
||||
// Determine Sign/Mantissa
|
||||
assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type;
|
||||
assign mantissaA = {vdd, Float1[51:0]};
|
||||
assign mantissaB = {vdd, Float2[51:0]};
|
||||
// Perform Exponent Subtraction - expA - expB + Bias
|
||||
assign exp1 = {2'b0, Float1[62:52]};
|
||||
assign exp2 = {2'b0, Float2[62:52]};
|
||||
// bias : DP = 2^{11-1}-1 = 1023
|
||||
assign bias = {3'h0, 10'h3FF};
|
||||
// Divide exponent
|
||||
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
|
||||
adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1);
|
||||
|
||||
// Sqrt exponent (check if exponent is odd)
|
||||
assign exp_odd = Float1[52] ? vss : vdd;
|
||||
adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2);
|
||||
// Choose correct exponent
|
||||
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
|
||||
|
||||
// Main Goldschmidt/Division Routine
|
||||
divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, P, op_type, exp_odd);
|
||||
|
||||
// FSM : control divider
|
||||
fsm_div control (done, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, start, error, op_type);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. The rounding units also handles special cases and
|
||||
// set the exception flags.
|
||||
rounder_div round1 (Result, DenormIO, FlagsIn,
|
||||
rm, P, OvEn, UnEn, expF,
|
||||
sel_inv, Invalid, DenormIn, signResult,
|
||||
q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
|
||||
flopenr #(1) regb (clk, reset, done, DenormIO, Denorm);
|
||||
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
|
||||
|
||||
endmodule // fpadd
|
@ -1,9 +0,0 @@
|
||||
module adder_ip #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
input logic cin,
|
||||
output logic [WIDTH-1:0] y,
|
||||
output logic cout);
|
||||
|
||||
assign {cout, y} = a + b + cin;
|
||||
|
||||
endmodule // adder
|
@ -3,8 +3,7 @@
|
||||
// it conditionally converts single precision values to double
|
||||
// precision values and modifies the sign of op1.
|
||||
// The converted operands are Float1 and Float2.
|
||||
|
||||
module convert_inputs(Float1, Float2b, op1, op2, op_type, P);
|
||||
module convert_inputs_div (Float1, Float2b, op1, op2, op_type, P);
|
||||
|
||||
input logic [63:0] op1; // 1st input operand (A)
|
||||
input logic [63:0] op2; // 2nd input operand (B)
|
@ -1,19 +1,13 @@
|
||||
`timescale 1ps/1ps
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk,
|
||||
load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, load_regp,
|
||||
P, op_type, exp_odd);
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
|
||||
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
|
||||
|
||||
input logic [52:0] d, n;
|
||||
input logic [2:0] sel_muxa, sel_muxb;
|
||||
input logic sel_muxr;
|
||||
input logic load_rega, load_regb, load_regc, load_regd;
|
||||
input logic load_regr, load_regs;
|
||||
input logic load_regp;
|
||||
input logic P;
|
||||
input logic op_type;
|
||||
input logic exp_odd;
|
||||
@ -78,86 +72,47 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
mux2 #(64) mx8 ({64'h0000_0000_0000_0200}, {64'h0000_0040_0000_0000}, P, q_const);
|
||||
mux2 #(64) mx9 ({64'h0000_0000_0000_0A00}, {64'h0000_0140_0000_0000}, P, qp_const);
|
||||
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
|
||||
|
||||
logic [127:0] Sum_pipe;
|
||||
logic [127:0] Carry_pipe;
|
||||
logic muxr_pipe;
|
||||
logic rega_pipe;
|
||||
logic regb_pipe;
|
||||
logic regc_pipe;
|
||||
logic regd_pipe;
|
||||
logic regs_pipe;
|
||||
logic regr_pipe;
|
||||
logic P_pipe;
|
||||
logic op_type_pipe;
|
||||
logic [63:0] q_const_pipe;
|
||||
logic [63:0] qm_const_pipe;
|
||||
logic [63:0] qp_const_pipe;
|
||||
|
||||
// Pipeline Stage 2 of iteration for Goldschmidt's algorithm
|
||||
flopenr #(128) regp1 (clk, reset, load_regp, Sum2, Sum_pipe);
|
||||
flopenr #(128) regp2 (clk, reset, load_regp, Carry2, Carry_pipe);
|
||||
flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe);
|
||||
|
||||
flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe);
|
||||
flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe);
|
||||
flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe);
|
||||
flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe);
|
||||
flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe);
|
||||
flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe);
|
||||
flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe);
|
||||
flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe);
|
||||
flopenr #(64) regpC (clk, reset, load_regp, q_const, q_const_pipe);
|
||||
flopenr #(64) regpD (clk, reset, load_regp, qp_const, qp_const_pipe);
|
||||
flopenr #(64) regpE (clk, reset, load_regp, qm_const, qm_const_pipe);
|
||||
|
||||
// CPA (from CSA)/Remainder addition/subtraction
|
||||
adder_ip #(128) cpa1 (Sum_pipe, Carry_pipe, muxr_pipe, mul_out, cout1);
|
||||
// ldf128 cpa1 (cout1, mul_out, Sum_pipe, Carry_pipe, muxr_pipe);
|
||||
// One's complement instead of two's complement (for hw efficiency)
|
||||
assign three = {~mul_out[126] , mul_out[126], ~mul_out[125:63]};
|
||||
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type_pipe, twocmp_out);
|
||||
adder #(128) cpa1 (Sum2, Carry2, muxr_out, mul_out, cout1);
|
||||
|
||||
// Assuming [1,2) - q1
|
||||
adder_ip #(64) cpa2 (regb_out, q_const_pipe, 1'b0, q_out1, cout2);
|
||||
adder_ip #(64) cpa3 (regb_out, qp_const_pipe, 1'b0, qp_out1, cout3);
|
||||
adder_ip #(64) cpa4 (regb_out, qm_const_pipe, 1'b1, qm_out1, cout4);
|
||||
adder_ip #(64) cpa5 ({regb_out[62:0], vss}, q_const_pipe, 1'b0, q_out0, cout5);
|
||||
adder_ip #(64) cpa6 ({regb_out[62:0], vss}, qp_const_pipe, 1'b0, qp_out0, cout6);
|
||||
adder_ip #(64) cpa7 ({regb_out[62:0], vss}, qm_const_pipe, 1'b1, qm_out0, cout7);
|
||||
|
||||
//ldf64 cpa2 (cout2, q_out1, regb_out, q_const_pipe, 1'b0);
|
||||
//ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const_pipe, 1'b0);
|
||||
//ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const_pipe, 1'b1);
|
||||
// Assuming [0.5,1) - q0
|
||||
//ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const_pipe, 1'b0);
|
||||
//ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const_pipe, 1'b0);
|
||||
//ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const_pipe, 1'b1);
|
||||
adder #(64) cpa2 (regb_out, q_const, 1'b0, q_out1, cout2);
|
||||
adder #(64) cpa3 (regb_out, qp_const, 1'b0, qp_out1, cout3);
|
||||
adder #(64) cpa4 (regb_out, qm_const, 1'b1, qm_out1, cout4);
|
||||
// Assuming [0.5,1) - q0
|
||||
adder #(64) cpa5 ({regb_out[62:0], vss}, q_const, 1'b0, q_out0, cout5);
|
||||
adder #(64) cpa6 ({regb_out[62:0], vss}, qp_const, 1'b0, qp_out0, cout6);
|
||||
adder #(64) cpa7 ({regb_out[62:0], vss}, qm_const, 1'b1, qm_out0, cout7);
|
||||
|
||||
// One's complement instead of two's complement (for hw efficiency)
|
||||
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
|
||||
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);
|
||||
|
||||
// regs
|
||||
flopenr #(64) regc (clk, reset, regc_pipe, twocmp_out, regc_out);
|
||||
flopenr #(64) regb (clk, reset, regb_pipe, mul_out[126:63], regb_out);
|
||||
flopenr #(64) rega (clk, reset, rega_pipe, mul_out[126:63], rega_out);
|
||||
flopenr #(64) regd (clk, reset, regd_pipe, mul_out[126:63], regd_out);
|
||||
|
||||
// remainder
|
||||
flopenr #(128) regr (clk, reset, regr_pipe, mul_out, regr_out);
|
||||
flopenr #(64) regc (clk, reset, load_regc, twocmp_out, regc_out);
|
||||
flopenr #(64) regb (clk, reset, load_regb, mul_out[126:63], regb_out);
|
||||
flopenr #(64) rega (clk, reset, load_rega, mul_out[126:63], rega_out);
|
||||
flopenr #(64) regd (clk, reset, load_regd, mul_out[126:63], regd_out);
|
||||
flopenr #(128) regr (clk, reset, load_regr, mul_out, regr_out);
|
||||
// Assuming [1,2)
|
||||
flopenr #(64) rege (clk, reset, regs_pipe, {q_out1[63:39], (q_out1[38:10] & {29{~P_pipe}}), 10'h0}, q1);
|
||||
flopenr #(64) regf (clk, reset, regs_pipe, {qm_out1[63:39], (qm_out1[38:10] & {29{~P_pipe}}), 10'h0}, qm1);
|
||||
flopenr #(64) regg (clk, reset, regs_pipe, {qp_out1[63:39], (qp_out1[38:10] & {29{~P_pipe}}), 10'h0}, qp1);
|
||||
flopenr #(64) rege (clk, reset, load_regs, {q_out1[63:39], (q_out1[38:10] & {29{~P}}), 10'h0}, q1);
|
||||
flopenr #(64) regf (clk, reset, load_regs, {qm_out1[63:39], (qm_out1[38:10] & {29{~P}}), 10'h0}, qm1);
|
||||
flopenr #(64) regg (clk, reset, load_regs, {qp_out1[63:39], (qp_out1[38:10] & {29{~P}}), 10'h0}, qp1);
|
||||
// Assuming [0,1)
|
||||
flopenr #(64) regh (clk, reset, regs_pipe, {q_out0[63:39], (q_out0[38:10] & {29{~P_pipe}}), 10'h0}, q0);
|
||||
flopenr #(64) regj (clk, reset, regs_pipe, {qm_out0[63:39], (qm_out0[38:10] & {29{~P_pipe}}), 10'h0}, qm0);
|
||||
flopenr #(64) regk (clk, reset, regs_pipe, {qp_out0[63:39], (qp_out0[38:10] & {29{~P_pipe}}), 10'h0}, qp0);
|
||||
flopenr #(64) regh (clk, reset, load_regs, {q_out0[63:39], (q_out0[38:10] & {29{~P}}), 10'h0}, q0);
|
||||
flopenr #(64) regj (clk, reset, load_regs, {qm_out0[63:39], (qm_out0[38:10] & {29{~P}}), 10'h0}, qm0);
|
||||
flopenr #(64) regk (clk, reset, load_regs, {qp_out0[63:39], (qp_out0[38:10] & {29{~P}}), 10'h0}, qp0);
|
||||
|
||||
endmodule // divconv
|
||||
|
||||
module adder #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
output logic [WIDTH-1:0] y);
|
||||
input logic cin,
|
||||
output logic [WIDTH-1:0] y,
|
||||
output logic cout);
|
||||
|
||||
assign y = a + b;
|
||||
assign {cout, y} = a + b + cin;
|
||||
|
||||
endmodule // adder
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user