mirror of
https://github.com/openhwgroup/cvw
synced 2025-01-30 00:14:28 +00:00
5061 lines
145 KiB
C
5061 lines
145 KiB
C
|
|
/*============================================================================
|
|
|
|
This C source file is part of TestFloat, Release 3e, a package of programs for
|
|
testing the correctness of floating-point arithmetic complying with the IEEE
|
|
Standard for Floating-Point, by John R. Hauser.
|
|
|
|
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
|
|
University of California. All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions, and the following disclaimer.
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
this list of conditions, and the following disclaimer in the documentation
|
|
and/or other materials provided with the distribution.
|
|
|
|
3. Neither the name of the University nor the names of its contributors may
|
|
be used to endorse or promote products derived from this software without
|
|
specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
|
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
=============================================================================*/
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <time.h>
|
|
#include "platform.h"
|
|
#include "uint128.h"
|
|
#include "fail.h"
|
|
#include "softfloat.h"
|
|
#include "functions.h"
|
|
|
|
enum { minIterations = 1000 };
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static const char *functionNamePtr;
|
|
static uint_fast8_t roundingPrecision;
|
|
static int roundingCode;
|
|
static int tininessCode;
|
|
static bool usesExact;
|
|
static bool exact;
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static void reportTime( int_fast64_t count, clock_t clockTicks )
|
|
{
|
|
static const char *roundingModeNames[NUM_ROUNDINGMODES] = {
|
|
0,
|
|
", rounding near_even",
|
|
", rounding minMag",
|
|
", rounding min",
|
|
", rounding max",
|
|
", rounding near_maxMag",
|
|
#ifdef FLOAT_ROUND_ODD
|
|
", rounding odd"
|
|
#endif
|
|
};
|
|
|
|
printf(
|
|
"%9.4f Mop/s: %s",
|
|
count / ((float) clockTicks / CLOCKS_PER_SEC) / 1000000,
|
|
functionNamePtr
|
|
);
|
|
if ( roundingCode ) {
|
|
#ifdef EXTFLOAT80
|
|
if ( roundingPrecision ) {
|
|
printf( ", precision %d", (int) roundingPrecision );
|
|
}
|
|
#endif
|
|
fputs( roundingModeNames[roundingCode], stdout );
|
|
if ( tininessCode ) {
|
|
fputs(
|
|
(tininessCode == TININESS_BEFORE_ROUNDING)
|
|
? ", tininess before rounding"
|
|
: ", tininess after rounding",
|
|
stdout
|
|
);
|
|
}
|
|
}
|
|
if ( usesExact ) fputs( exact ? ", exact" : ", not exact", stdout );
|
|
fputc( '\n', stdout );
|
|
fflush( stdout );
|
|
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifdef FLOAT16
|
|
union ui16_f16 { uint16_t ui; float16_t f; };
|
|
#endif
|
|
union ui32_f32 { uint32_t ui; float32_t f; };
|
|
#ifdef FLOAT64
|
|
union ui64_f64 { uint64_t ui; float64_t f; };
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum { numInputs_ui32 = 32 };
|
|
|
|
static const uint32_t inputs_ui32[numInputs_ui32] = {
|
|
0x00004487, 0x405CF80F, 0x00000000, 0x000002FC,
|
|
0x000DFFFE, 0x0C8EF795, 0x0FFFEE01, 0x000006CA,
|
|
0x00009BFE, 0x00B79D1D, 0x60001002, 0x00000049,
|
|
0x0BFF7FFF, 0x0000F37A, 0x0011DFFE, 0x00000006,
|
|
0x000FDFFA, 0x0000082F, 0x10200003, 0x2172089B,
|
|
0x00003E02, 0x000019E8, 0x0008FFFE, 0x000004A4,
|
|
0x00208002, 0x07C42FBF, 0x0FFFE3FF, 0x040B9F13,
|
|
0x40000008, 0x0001BF56, 0x000017F6, 0x000A908A
|
|
};
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_ui32_z_f16( float16_t function( uint32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_ui32_z_f32( float32_t function( uint32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_ui32_z_f64( float64_t function( uint32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_ui32_z_extF80( void function( uint32_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_ui32_z_f128( void function( uint32_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum { numInputs_ui64 = 32 };
|
|
|
|
static const int64_t inputs_ui64[numInputs_ui64] = {
|
|
UINT64_C( 0x04003C0000000001 ), UINT64_C( 0x0000000003C589BC ),
|
|
UINT64_C( 0x00000000400013FE ), UINT64_C( 0x0000000000186171 ),
|
|
UINT64_C( 0x0000000000010406 ), UINT64_C( 0x000002861920038D ),
|
|
UINT64_C( 0x0000000010001DFF ), UINT64_C( 0x22E5F0F387AEC8F0 ),
|
|
UINT64_C( 0x00007C0000010002 ), UINT64_C( 0x00756EBD1AD0C1C7 ),
|
|
UINT64_C( 0x0003FDFFFFFFFFBE ), UINT64_C( 0x0007D0FB2C2CA951 ),
|
|
UINT64_C( 0x0007FC0007FFFFFE ), UINT64_C( 0x0000001F942B18BB ),
|
|
UINT64_C( 0x0000080101FFFFFE ), UINT64_C( 0x000000000000F688 ),
|
|
UINT64_C( 0x000000000008BFFF ), UINT64_C( 0x0000000006F5AF08 ),
|
|
UINT64_C( 0x0021008000000002 ), UINT64_C( 0x0000000000000003 ),
|
|
UINT64_C( 0x3FFFFFFFFF80007D ), UINT64_C( 0x0000000000000078 ),
|
|
UINT64_C( 0x0007FFFFFF802003 ), UINT64_C( 0x1BBC775B78016AB0 ),
|
|
UINT64_C( 0x0006FFE000000002 ), UINT64_C( 0x0002B89854671BC1 ),
|
|
UINT64_C( 0x0000010001FFFFE2 ), UINT64_C( 0x00000000000FB103 ),
|
|
UINT64_C( 0x07FFFFFFFFFFF7FF ), UINT64_C( 0x00036155C7076FB0 ),
|
|
UINT64_C( 0x00000020FBFFFFFE ), UINT64_C( 0x0000099AE6455357 )
|
|
};
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_ui64_z_f16( float16_t function( uint64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_ui64_z_f32( float32_t function( uint64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_ui64_z_f64( float64_t function( uint64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_ui64_z_extF80( void function( uint64_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_ui64_z_f128( void function( uint64_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_ui64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_ui64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_ui64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum { numInputs_i32 = 32 };
|
|
|
|
static const int32_t inputs_i32[numInputs_i32] = {
|
|
-0x00004487, 0x405CF80F, 0x00000000, -0x000002FC,
|
|
-0x000DFFFE, 0x0C8EF795, -0x0FFFEE01, 0x000006CA,
|
|
0x00009BFE, -0x00B79D1D, -0x60001002, -0x00000049,
|
|
0x0BFF7FFF, 0x0000F37A, 0x0011DFFE, 0x00000006,
|
|
-0x000FDFFA, -0x0000082F, 0x10200003, -0x2172089B,
|
|
0x00003E02, 0x000019E8, 0x0008FFFE, -0x000004A4,
|
|
-0x00208002, 0x07C42FBF, 0x0FFFE3FF, 0x040B9F13,
|
|
-0x40000008, 0x0001BF56, 0x000017F6, 0x000A908A
|
|
};
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_i32_z_f16( float16_t function( int32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_i32_z_f32( float32_t function( int32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_i32_z_f64( float64_t function( int32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i32[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_i32_z_extF80( void function( int32_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_i32_z_f128( void function( int32_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i32[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum { numInputs_i64 = 32 };
|
|
|
|
static const int64_t inputs_i64[numInputs_i64] = {
|
|
-INT64_C( 0x04003C0000000001 ), INT64_C( 0x0000000003C589BC ),
|
|
INT64_C( 0x00000000400013FE ), INT64_C( 0x0000000000186171 ),
|
|
-INT64_C( 0x0000000000010406 ), -INT64_C( 0x000002861920038D ),
|
|
INT64_C( 0x0000000010001DFF ), -INT64_C( 0x22E5F0F387AEC8F0 ),
|
|
-INT64_C( 0x00007C0000010002 ), INT64_C( 0x00756EBD1AD0C1C7 ),
|
|
INT64_C( 0x0003FDFFFFFFFFBE ), INT64_C( 0x0007D0FB2C2CA951 ),
|
|
INT64_C( 0x0007FC0007FFFFFE ), INT64_C( 0x0000001F942B18BB ),
|
|
INT64_C( 0x0000080101FFFFFE ), -INT64_C( 0x000000000000F688 ),
|
|
INT64_C( 0x000000000008BFFF ), INT64_C( 0x0000000006F5AF08 ),
|
|
-INT64_C( 0x0021008000000002 ), INT64_C( 0x0000000000000003 ),
|
|
INT64_C( 0x3FFFFFFFFF80007D ), INT64_C( 0x0000000000000078 ),
|
|
-INT64_C( 0x0007FFFFFF802003 ), INT64_C( 0x1BBC775B78016AB0 ),
|
|
-INT64_C( 0x0006FFE000000002 ), -INT64_C( 0x0002B89854671BC1 ),
|
|
-INT64_C( 0x0000010001FFFFE2 ), -INT64_C( 0x00000000000FB103 ),
|
|
INT64_C( 0x07FFFFFFFFFFF7FF ), -INT64_C( 0x00036155C7076FB0 ),
|
|
INT64_C( 0x00000020FBFFFFFE ), INT64_C( 0x0000099AE6455357 )
|
|
};
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_i64_z_f16( float16_t function( int64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_i64_z_f32( float32_t function( int64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_i64_z_f64( float64_t function( int64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i64[inputNum] );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_i64_z_extF80( void function( int64_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_i64_z_f128( void function( int64_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( inputs_i64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( inputs_i64[inputNum], &z );
|
|
inputNum = (inputNum + 1) & (numInputs_i64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifdef FLOAT16
|
|
|
|
enum { numInputs_f16 = 32 };
|
|
|
|
static const uint16_t inputs_F16UI[numInputs_f16] = {
|
|
0x0BBA, 0x77FE, 0x084F, 0x9C0F, 0x7800, 0x4436, 0xCE67, 0x80F3,
|
|
0x87EF, 0xC2FA, 0x7BFF, 0x13FE, 0x7BFE, 0x1C00, 0xAC46, 0xEAFA,
|
|
0x3813, 0x4804, 0x385E, 0x8000, 0xB86C, 0x4B7D, 0xC7FD, 0xC97F,
|
|
0x260C, 0x78EE, 0xB84F, 0x249E, 0x0D27, 0x37DC, 0x8400, 0xE8EF
|
|
};
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_ui32_rx(
|
|
uint_fast32_t function( float16_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_ui64_rx(
|
|
uint_fast64_t function( float16_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_i32_rx(
|
|
int_fast32_t function( float16_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_i64_rx(
|
|
int_fast64_t function( float16_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_ui32_x( uint_fast32_t function( float16_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f16_z_ui64_x( uint_fast64_t function( float16_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f16_z_i32_x( int_fast32_t function( float16_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f16_z_i64_x( int_fast64_t function( float16_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_a_f16_z_f32( float32_t function( float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_f16_z_f64( float64_t function( float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_f16_z_extF80( void function( float16_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_f16_z_f128( void function( float16_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static
|
|
void
|
|
time_az_f16_rx(
|
|
float16_t function( float16_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_abz_f16( float16_t function( float16_t, float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_abcz_f16( float16_t function( float16_t, float16_t, float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB, inputNumC;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA, uB, uC;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
uC.ui = inputs_F16UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
uC.ui = inputs_F16UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_ab_f16_z_bool( bool function( float16_t, float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI[inputNumA];
|
|
uB.ui = inputs_F16UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f16 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static const uint16_t inputs_F16UI_pos[numInputs_f16] = {
|
|
0x0BBA, 0x77FE, 0x084F, 0x1C0F, 0x7800, 0x4436, 0x4E67, 0x00F3,
|
|
0x07EF, 0x42FA, 0x7BFF, 0x13FE, 0x7BFE, 0x1C00, 0x2C46, 0x6AFA,
|
|
0x3813, 0x4804, 0x385E, 0x0000, 0x386C, 0x4B7D, 0x47FD, 0x497F,
|
|
0x260C, 0x78EE, 0x384F, 0x249E, 0x0D27, 0x37DC, 0x0400, 0x68EF
|
|
};
|
|
|
|
static void time_az_f16_pos( float16_t function( float16_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui16_f16 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F16UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F16UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f16 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
enum { numInputs_f32 = 32 };
|
|
|
|
static const uint32_t inputs_F32UI[numInputs_f32] = {
|
|
0x4EFA0000, 0xC1D0B328, 0x80000000, 0x3E69A31E,
|
|
0xAF803EFF, 0x3F800000, 0x17BF8000, 0xE74A301A,
|
|
0x4E010003, 0x7EE3C75D, 0xBD803FE0, 0xBFFEFF00,
|
|
0x7981F800, 0x431FFFFC, 0xC100C000, 0x3D87EFFF,
|
|
0x4103FEFE, 0xBC000007, 0xBF01F7FF, 0x4E6C6B5C,
|
|
0xC187FFFE, 0xC58B9F13, 0x4F88007F, 0xDF004007,
|
|
0xB7FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000,
|
|
0xDB428661, 0x33F89B1F, 0xA3BFEFFF, 0x537BFFBE
|
|
};
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_ui32_rx(
|
|
uint_fast32_t function( float32_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_ui64_rx(
|
|
uint_fast64_t function( float32_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_i32_rx(
|
|
int_fast32_t function( float32_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_i64_rx(
|
|
int_fast64_t function( float32_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_ui32_x( uint_fast32_t function( float32_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f32_z_ui64_x( uint_fast64_t function( float32_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f32_z_i32_x( int_fast32_t function( float32_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f32_z_i64_x( int_fast64_t function( float32_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_f32_z_f16( float16_t function( float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_f32_z_f64( float64_t function( float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_f32_z_extF80( void function( float32_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_f32_z_f128( void function( float32_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static
|
|
void
|
|
time_az_f32_rx(
|
|
float32_t function( float32_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_abz_f32( float32_t function( float32_t, float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_abcz_f32( float32_t function( float32_t, float32_t, float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB, inputNumC;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA, uB, uC;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
uC.ui = inputs_F32UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
uC.ui = inputs_F32UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_ab_f32_z_bool( bool function( float32_t, float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI[inputNumA];
|
|
uB.ui = inputs_F32UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f32 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static const uint32_t inputs_F32UI_pos[numInputs_f32] = {
|
|
0x4EFA0000, 0x41D0B328, 0x00000000, 0x3E69A31E,
|
|
0x2F803EFF, 0x3F800000, 0x17BF8000, 0x674A301A,
|
|
0x4E010003, 0x7EE3C75D, 0x3D803FE0, 0x3FFEFF00,
|
|
0x7981F800, 0x431FFFFC, 0x4100C000, 0x3D87EFFF,
|
|
0x4103FEFE, 0x3C000007, 0x3F01F7FF, 0x4E6C6B5C,
|
|
0x4187FFFE, 0x458B9F13, 0x4F88007F, 0x5F004007,
|
|
0x37FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000,
|
|
0x5B428661, 0x33F89B1F, 0x23BFEFFF, 0x537BFFBE
|
|
};
|
|
|
|
static void time_az_f32_pos( float32_t function( float32_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui32_f32 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F32UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F32UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f32 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifdef FLOAT64
|
|
|
|
enum { numInputs_f64 = 32 };
|
|
|
|
static const uint64_t inputs_F64UI[numInputs_f64] = {
|
|
UINT64_C( 0x422FFFC008000000 ),
|
|
UINT64_C( 0xB7E0000480000000 ),
|
|
UINT64_C( 0xF3FD2546120B7935 ),
|
|
UINT64_C( 0x3FF0000000000000 ),
|
|
UINT64_C( 0xCE07F766F09588D6 ),
|
|
UINT64_C( 0x8000000000000000 ),
|
|
UINT64_C( 0x3FCE000400000000 ),
|
|
UINT64_C( 0x8313B60F0032BED8 ),
|
|
UINT64_C( 0xC1EFFFFFC0002000 ),
|
|
UINT64_C( 0x3FB3C75D224F2B0F ),
|
|
UINT64_C( 0x7FD00000004000FF ),
|
|
UINT64_C( 0xA12FFF8000001FFF ),
|
|
UINT64_C( 0x3EE0000000FE0000 ),
|
|
UINT64_C( 0x0010000080000004 ),
|
|
UINT64_C( 0x41CFFFFE00000020 ),
|
|
UINT64_C( 0x40303FFFFFFFFFFD ),
|
|
UINT64_C( 0x3FD000003FEFFFFF ),
|
|
UINT64_C( 0xBFD0000010000000 ),
|
|
UINT64_C( 0xB7FC6B5C16CA55CF ),
|
|
UINT64_C( 0x413EEB940B9D1301 ),
|
|
UINT64_C( 0xC7E00200001FFFFF ),
|
|
UINT64_C( 0x47F00021FFFFFFFE ),
|
|
UINT64_C( 0xBFFFFFFFF80000FF ),
|
|
UINT64_C( 0xC07FFFFFE00FFFFF ),
|
|
UINT64_C( 0x001497A63740C5E8 ),
|
|
UINT64_C( 0xC4BFFFE0001FFFFF ),
|
|
UINT64_C( 0x96FFDFFEFFFFFFFF ),
|
|
UINT64_C( 0x403FC000000001FE ),
|
|
UINT64_C( 0xFFD00000000001F6 ),
|
|
UINT64_C( 0x0640400002000000 ),
|
|
UINT64_C( 0x479CEE1E4F789FE0 ),
|
|
UINT64_C( 0xC237FFFFFFFFFDFE )
|
|
};
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_ui32_rx(
|
|
uint_fast32_t function( float64_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_ui64_rx(
|
|
uint_fast64_t function( float64_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_i32_rx(
|
|
int_fast32_t function( float64_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_i64_rx(
|
|
int_fast64_t function( float64_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_ui32_x( uint_fast32_t function( float64_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f64_z_ui64_x( uint_fast64_t function( float64_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f64_z_i32_x( int_fast32_t function( float64_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_a_f64_z_i64_x( int_fast64_t function( float64_t, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_f64_z_f16( float16_t function( float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_f64_z_f32( float32_t function( float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static void time_a_f64_z_extF80( void function( float64_t, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static void time_a_f64_z_f128( void function( float64_t, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static
|
|
void
|
|
time_az_f64_rx(
|
|
float64_t function( float64_t, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNum];
|
|
function( uA.f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_abz_f64( float64_t function( float64_t, float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void time_abcz_f64( float64_t function( float64_t, float64_t, float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB, inputNumC;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA, uB, uC;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
uC.ui = inputs_F64UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
uC.ui = inputs_F64UI[inputNumC];
|
|
function( uA.f, uB.f, uC.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static void time_ab_f64_z_bool( bool function( float64_t, float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA, uB;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI[inputNumA];
|
|
uB.ui = inputs_F64UI[inputNumB];
|
|
function( uA.f, uB.f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f64 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static const uint64_t inputs_F64UI_pos[numInputs_f64] = {
|
|
UINT64_C( 0x422FFFC008000000 ),
|
|
UINT64_C( 0x37E0000480000000 ),
|
|
UINT64_C( 0x73FD2546120B7935 ),
|
|
UINT64_C( 0x3FF0000000000000 ),
|
|
UINT64_C( 0x4E07F766F09588D6 ),
|
|
UINT64_C( 0x0000000000000000 ),
|
|
UINT64_C( 0x3FCE000400000000 ),
|
|
UINT64_C( 0x0313B60F0032BED8 ),
|
|
UINT64_C( 0x41EFFFFFC0002000 ),
|
|
UINT64_C( 0x3FB3C75D224F2B0F ),
|
|
UINT64_C( 0x7FD00000004000FF ),
|
|
UINT64_C( 0x212FFF8000001FFF ),
|
|
UINT64_C( 0x3EE0000000FE0000 ),
|
|
UINT64_C( 0x0010000080000004 ),
|
|
UINT64_C( 0x41CFFFFE00000020 ),
|
|
UINT64_C( 0x40303FFFFFFFFFFD ),
|
|
UINT64_C( 0x3FD000003FEFFFFF ),
|
|
UINT64_C( 0x3FD0000010000000 ),
|
|
UINT64_C( 0x37FC6B5C16CA55CF ),
|
|
UINT64_C( 0x413EEB940B9D1301 ),
|
|
UINT64_C( 0x47E00200001FFFFF ),
|
|
UINT64_C( 0x47F00021FFFFFFFE ),
|
|
UINT64_C( 0x3FFFFFFFF80000FF ),
|
|
UINT64_C( 0x407FFFFFE00FFFFF ),
|
|
UINT64_C( 0x001497A63740C5E8 ),
|
|
UINT64_C( 0x44BFFFE0001FFFFF ),
|
|
UINT64_C( 0x16FFDFFEFFFFFFFF ),
|
|
UINT64_C( 0x403FC000000001FE ),
|
|
UINT64_C( 0x7FD00000000001F6 ),
|
|
UINT64_C( 0x0640400002000000 ),
|
|
UINT64_C( 0x479CEE1E4F789FE0 ),
|
|
UINT64_C( 0x4237FFFFFFFFFDFE )
|
|
};
|
|
|
|
static void time_az_f64_pos( float64_t function( float64_t ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
union ui64_f64 uA;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
uA.ui = inputs_F64UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
uA.ui = inputs_F64UI_pos[inputNum];
|
|
function( uA.f );
|
|
inputNum = (inputNum + 1) & (numInputs_f64 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
#ifdef LITTLEENDIAN
|
|
#define extF80Const( v64, v0 ) { UINT64_C( v0 ), v64 }
|
|
#else
|
|
#define extF80Const( v64, v0 ) { v64, UINT64_C( v0 ) }
|
|
#endif
|
|
|
|
enum { numInputs_extF80 = 32 };
|
|
|
|
static
|
|
const union { struct extFloat80M s; extFloat80_t f; }
|
|
inputs_extF80[numInputs_extF80] = {
|
|
extF80Const( 0xC03F, 0xA9BE15A19C1E8B62 ),
|
|
extF80Const( 0x8000, 0x0000000000000000 ),
|
|
extF80Const( 0x75A8, 0xE59591E4788957A5 ),
|
|
extF80Const( 0xBFFF, 0xFFF0000000000040 ),
|
|
extF80Const( 0x0CD8, 0xFC000000000007FE ),
|
|
extF80Const( 0x43BA, 0x99A4000000000000 ),
|
|
extF80Const( 0x3FFF, 0x8000000000000000 ),
|
|
extF80Const( 0x4081, 0x94FBF1BCEB5545F0 ),
|
|
extF80Const( 0x403E, 0xFFF0000000002000 ),
|
|
extF80Const( 0x3FFE, 0xC860E3C75D224F28 ),
|
|
extF80Const( 0x407E, 0xFC00000FFFFFFFFE ),
|
|
extF80Const( 0x737A, 0x800000007FFDFFFE ),
|
|
extF80Const( 0x4044, 0xFFFFFF80000FFFFF ),
|
|
extF80Const( 0xBBFE, 0x8000040000001FFE ),
|
|
extF80Const( 0xC002, 0xFF80000000000020 ),
|
|
extF80Const( 0xDE8D, 0xFFFFFFFFFFE00004 ),
|
|
extF80Const( 0xC004, 0x8000000000003FFB ),
|
|
extF80Const( 0x407F, 0x800000000003FFFE ),
|
|
extF80Const( 0xC000, 0xA459EE6A5C16CA55 ),
|
|
extF80Const( 0x8003, 0xC42CBF7399AEEB94 ),
|
|
extF80Const( 0xBF7F, 0xF800000000000006 ),
|
|
extF80Const( 0xC07F, 0xBF56BE8871F28FEA ),
|
|
extF80Const( 0xC07E, 0xFFFF77FFFFFFFFFE ),
|
|
extF80Const( 0xADC9, 0x8000000FFFFFFFDE ),
|
|
extF80Const( 0xC001, 0xEFF7FFFFFFFFFFFF ),
|
|
extF80Const( 0x4001, 0xBE84F30125C497A6 ),
|
|
extF80Const( 0xC06B, 0xEFFFFFFFFFFFFFFF ),
|
|
extF80Const( 0x4080, 0xFFFFFFFFBFFFFFFF ),
|
|
extF80Const( 0x87E9, 0x81FFFFFFFFFFFBFF ),
|
|
extF80Const( 0xA63F, 0x801FFFFFFEFFFFFE ),
|
|
extF80Const( 0x403C, 0x801FFFFFFFF7FFFF ),
|
|
extF80Const( 0x4018, 0x8000000000080003 )
|
|
};
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_ui32_rx(
|
|
uint_fast32_t function( const extFloat80_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_ui64_rx(
|
|
uint_fast64_t function( const extFloat80_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_i32_rx(
|
|
int_fast32_t function( const extFloat80_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_i64_rx(
|
|
int_fast64_t function( const extFloat80_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_ui32_x(
|
|
uint_fast32_t function( const extFloat80_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_ui64_x(
|
|
uint_fast64_t function( const extFloat80_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_i32_x(
|
|
int_fast32_t function( const extFloat80_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_i64_x(
|
|
int_fast64_t function( const extFloat80_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_extF80_z_f16( float16_t function( const extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_extF80_z_f32( float32_t function( const extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_extF80_z_f64( float64_t function( const extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef FLOAT128
|
|
|
|
static
|
|
void
|
|
time_a_extF80_z_f128( void function( const extFloat80_t *, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static
|
|
void
|
|
time_az_extF80_rx(
|
|
void function( const extFloat80_t *, uint_fast8_t, bool, extFloat80_t * ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, roundingMode, exact, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_abz_extF80(
|
|
void
|
|
function( const extFloat80_t *, const extFloat80_t *, extFloat80_t * )
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function(
|
|
&inputs_extF80[inputNumA].f, &inputs_extF80[inputNumB].f, &z );
|
|
inputNumA = (inputNumA + 1) & (numInputs_extF80 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function(
|
|
&inputs_extF80[inputNumA].f, &inputs_extF80[inputNumB].f, &z );
|
|
inputNumA = (inputNumA + 1) & (numInputs_extF80 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_ab_extF80_z_bool(
|
|
bool function( const extFloat80_t *, const extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function(
|
|
&inputs_extF80[inputNumA].f, &inputs_extF80[inputNumB].f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_extF80 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNumA].f, &inputs_extF80[inputNumB].f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_extF80 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
const union { struct extFloat80M s; extFloat80_t f; }
|
|
inputs_extF80_pos[numInputs_extF80] = {
|
|
extF80Const( 0x403F, 0xA9BE15A19C1E8B62 ),
|
|
extF80Const( 0x0000, 0x0000000000000000 ),
|
|
extF80Const( 0x75A8, 0xE59591E4788957A5 ),
|
|
extF80Const( 0x3FFF, 0xFFF0000000000040 ),
|
|
extF80Const( 0x0CD8, 0xFC000000000007FE ),
|
|
extF80Const( 0x43BA, 0x99A4000000000000 ),
|
|
extF80Const( 0x3FFF, 0x8000000000000000 ),
|
|
extF80Const( 0x4081, 0x94FBF1BCEB5545F0 ),
|
|
extF80Const( 0x403E, 0xFFF0000000002000 ),
|
|
extF80Const( 0x3FFE, 0xC860E3C75D224F28 ),
|
|
extF80Const( 0x407E, 0xFC00000FFFFFFFFE ),
|
|
extF80Const( 0x737A, 0x800000007FFDFFFE ),
|
|
extF80Const( 0x4044, 0xFFFFFF80000FFFFF ),
|
|
extF80Const( 0x3BFE, 0x8000040000001FFE ),
|
|
extF80Const( 0x4002, 0xFF80000000000020 ),
|
|
extF80Const( 0x5E8D, 0xFFFFFFFFFFE00004 ),
|
|
extF80Const( 0x4004, 0x8000000000003FFB ),
|
|
extF80Const( 0x407F, 0x800000000003FFFE ),
|
|
extF80Const( 0x4000, 0xA459EE6A5C16CA55 ),
|
|
extF80Const( 0x0003, 0xC42CBF7399AEEB94 ),
|
|
extF80Const( 0x3F7F, 0xF800000000000006 ),
|
|
extF80Const( 0x407F, 0xBF56BE8871F28FEA ),
|
|
extF80Const( 0x407E, 0xFFFF77FFFFFFFFFE ),
|
|
extF80Const( 0x2DC9, 0x8000000FFFFFFFDE ),
|
|
extF80Const( 0x4001, 0xEFF7FFFFFFFFFFFF ),
|
|
extF80Const( 0x4001, 0xBE84F30125C497A6 ),
|
|
extF80Const( 0x406B, 0xEFFFFFFFFFFFFFFF ),
|
|
extF80Const( 0x4080, 0xFFFFFFFFBFFFFFFF ),
|
|
extF80Const( 0x07E9, 0x81FFFFFFFFFFFBFF ),
|
|
extF80Const( 0x263F, 0x801FFFFFFEFFFFFE ),
|
|
extF80Const( 0x403C, 0x801FFFFFFFF7FFFF ),
|
|
extF80Const( 0x4018, 0x8000000000080003 )
|
|
};
|
|
|
|
static
|
|
void
|
|
time_az_extF80_pos( void function( const extFloat80_t *, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_extF80[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_extF80 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
#ifdef FLOAT128
|
|
|
|
#ifdef LITTLEENDIAN
|
|
#define f128Const( v64, v0 ) { UINT64_C( v0 ), UINT64_C( v64 ) }
|
|
#else
|
|
#define f128Const( v64, v0 ) { UINT64_C( v64 ), UINT64_C( v0 ) }
|
|
#endif
|
|
|
|
enum { numInputs_f128 = 32 };
|
|
|
|
static
|
|
const union { struct uint128 ui; float128_t f; }
|
|
inputs_f128[numInputs_f128] = {
|
|
f128Const( 0x3FDA200000100000, 0x0000000000000000 ),
|
|
f128Const( 0x3FFF000000000000, 0x0000000000000000 ),
|
|
f128Const( 0x85F14776190C8306, 0xD8715F4E3D54BB92 ),
|
|
f128Const( 0xF2B00000007FFFFF, 0xFFFFFFFFFFF7FFFF ),
|
|
f128Const( 0x8000000000000000, 0x0000000000000000 ),
|
|
f128Const( 0xBFFFFFFFFFE00000, 0x0000008000000000 ),
|
|
f128Const( 0x407F1719CE722F3E, 0xDA6B3FE5FF29425B ),
|
|
f128Const( 0x43FFFF8000000000, 0x0000000000400000 ),
|
|
f128Const( 0x401E000000000100, 0x0000000000002000 ),
|
|
f128Const( 0x3FFED71DACDA8E47, 0x4860E3C75D224F28 ),
|
|
f128Const( 0xBF7ECFC1E90647D1, 0x7A124FE55623EE44 ),
|
|
f128Const( 0x0DF7007FFFFFFFFF, 0xFFFFFFFFEFFFFFFF ),
|
|
f128Const( 0x3FE5FFEFFFFFFFFF, 0xFFFFFFFFFFFFEFFF ),
|
|
f128Const( 0x403FFFFFFFFFFFFF, 0xFFFFFFFFFFFFFBFE ),
|
|
f128Const( 0xBFFB2FBF7399AFEB, 0xA459EE6A5C16CA55 ),
|
|
f128Const( 0xBDB8FFFFFFFFFFFC, 0x0000000000000400 ),
|
|
f128Const( 0x3FC8FFDFFFFFFFFF, 0xFFFFFFFFF0000000 ),
|
|
f128Const( 0x3FFBFFFFFFDFFFFF, 0xFFF8000000000000 ),
|
|
f128Const( 0x407043C11737BE84, 0xDDD58212ADC937F4 ),
|
|
f128Const( 0x8001000000000000, 0x0000001000000001 ),
|
|
f128Const( 0xC036FFFFFFFFFFFF, 0xFE40000000000000 ),
|
|
f128Const( 0x4002FFFFFE000002, 0x0000000000000000 ),
|
|
f128Const( 0x4000C3FEDE897773, 0x326AC4FD8EFBE6DC ),
|
|
f128Const( 0xBFFF0000000FFFFF, 0xFFFFFE0000000000 ),
|
|
f128Const( 0x62C3E502146E426D, 0x43F3CAA0DC7DF1A0 ),
|
|
f128Const( 0xB5CBD32E52BB570E, 0xBCC477CB11C6236C ),
|
|
f128Const( 0xE228FFFFFFC00000, 0x0000000000000000 ),
|
|
f128Const( 0x3F80000000000000, 0x0000000080000008 ),
|
|
f128Const( 0xC1AFFFDFFFFFFFFF, 0xFFFC000000000000 ),
|
|
f128Const( 0xC96F000000000000, 0x00000001FFFBFFFF ),
|
|
f128Const( 0x3DE09BFE7923A338, 0xBCC8FBBD7CEC1F4F ),
|
|
f128Const( 0x401CFFFFFFFFFFFF, 0xFFFFFFFEFFFFFF80 )
|
|
};
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_ui32_rx(
|
|
uint_fast32_t function( const float128_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_ui64_rx(
|
|
uint_fast64_t function( const float128_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_i32_rx(
|
|
int_fast32_t function( const float128_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_i64_rx(
|
|
int_fast64_t function( const float128_t *, uint_fast8_t, bool ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_ui32_x(
|
|
uint_fast32_t function( const float128_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_ui64_x(
|
|
uint_fast64_t function( const float128_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_i32_x(
|
|
int_fast32_t function( const float128_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_i64_x(
|
|
int_fast64_t function( const float128_t *, bool ), bool exact )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, exact );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT16
|
|
|
|
static void time_a_f128_z_f16( float16_t function( const float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static void time_a_f128_z_f32( float32_t function( const float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#ifdef FLOAT64
|
|
|
|
static void time_a_f128_z_f64( float64_t function( const float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef EXTFLOAT80
|
|
|
|
static
|
|
void
|
|
time_a_f128_z_extF80( void function( const float128_t *, extFloat80_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
extFloat80_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static
|
|
void
|
|
time_az_f128_rx(
|
|
void function( const float128_t *, uint_fast8_t, bool, float128_t * ),
|
|
uint_fast8_t roundingMode,
|
|
bool exact
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, roundingMode, exact, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_abz_f128(
|
|
void function( const float128_t *, const float128_t *, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function(
|
|
&inputs_f128[inputNumA].f, &inputs_f128[inputNumB].f, &z );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNumA].f, &inputs_f128[inputNumB].f, &z );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_abcz_f128(
|
|
void
|
|
function(
|
|
const float128_t *,
|
|
const float128_t *,
|
|
const float128_t *,
|
|
float128_t *
|
|
)
|
|
)
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB, inputNumC;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function(
|
|
&inputs_f128[inputNumA].f,
|
|
&inputs_f128[inputNumB].f,
|
|
&inputs_f128[inputNumC].f,
|
|
&z
|
|
);
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
inputNumC = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function(
|
|
&inputs_f128[inputNumA].f,
|
|
&inputs_f128[inputNumB].f,
|
|
&inputs_f128[inputNumC].f,
|
|
&z
|
|
);
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumB ) ++inputNumC;
|
|
inputNumC = (inputNumC + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
void
|
|
time_ab_f128_z_bool( bool function( const float128_t *, const float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNumA, inputNumB;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNumA].f, &inputs_f128[inputNumB].f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNumA = 0;
|
|
inputNumB = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNumA].f, &inputs_f128[inputNumB].f );
|
|
inputNumA = (inputNumA + 1) & (numInputs_f128 - 1);
|
|
if ( ! inputNumA ) ++inputNumB;
|
|
inputNumB = (inputNumB + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
static
|
|
const union { struct uint128 ui; float128_t f; }
|
|
inputs_f128_pos[numInputs_f128] = {
|
|
f128Const( 0x3FDA200000100000, 0x0000000000000000 ),
|
|
f128Const( 0x3FFF000000000000, 0x0000000000000000 ),
|
|
f128Const( 0x05F14776190C8306, 0xD8715F4E3D54BB92 ),
|
|
f128Const( 0x72B00000007FFFFF, 0xFFFFFFFFFFF7FFFF ),
|
|
f128Const( 0x0000000000000000, 0x0000000000000000 ),
|
|
f128Const( 0x3FFFFFFFFFE00000, 0x0000008000000000 ),
|
|
f128Const( 0x407F1719CE722F3E, 0xDA6B3FE5FF29425B ),
|
|
f128Const( 0x43FFFF8000000000, 0x0000000000400000 ),
|
|
f128Const( 0x401E000000000100, 0x0000000000002000 ),
|
|
f128Const( 0x3FFED71DACDA8E47, 0x4860E3C75D224F28 ),
|
|
f128Const( 0x3F7ECFC1E90647D1, 0x7A124FE55623EE44 ),
|
|
f128Const( 0x0DF7007FFFFFFFFF, 0xFFFFFFFFEFFFFFFF ),
|
|
f128Const( 0x3FE5FFEFFFFFFFFF, 0xFFFFFFFFFFFFEFFF ),
|
|
f128Const( 0x403FFFFFFFFFFFFF, 0xFFFFFFFFFFFFFBFE ),
|
|
f128Const( 0x3FFB2FBF7399AFEB, 0xA459EE6A5C16CA55 ),
|
|
f128Const( 0x3DB8FFFFFFFFFFFC, 0x0000000000000400 ),
|
|
f128Const( 0x3FC8FFDFFFFFFFFF, 0xFFFFFFFFF0000000 ),
|
|
f128Const( 0x3FFBFFFFFFDFFFFF, 0xFFF8000000000000 ),
|
|
f128Const( 0x407043C11737BE84, 0xDDD58212ADC937F4 ),
|
|
f128Const( 0x0001000000000000, 0x0000001000000001 ),
|
|
f128Const( 0x4036FFFFFFFFFFFF, 0xFE40000000000000 ),
|
|
f128Const( 0x4002FFFFFE000002, 0x0000000000000000 ),
|
|
f128Const( 0x4000C3FEDE897773, 0x326AC4FD8EFBE6DC ),
|
|
f128Const( 0x3FFF0000000FFFFF, 0xFFFFFE0000000000 ),
|
|
f128Const( 0x62C3E502146E426D, 0x43F3CAA0DC7DF1A0 ),
|
|
f128Const( 0x35CBD32E52BB570E, 0xBCC477CB11C6236C ),
|
|
f128Const( 0x6228FFFFFFC00000, 0x0000000000000000 ),
|
|
f128Const( 0x3F80000000000000, 0x0000000080000008 ),
|
|
f128Const( 0x41AFFFDFFFFFFFFF, 0xFFFC000000000000 ),
|
|
f128Const( 0x496F000000000000, 0x00000001FFFBFFFF ),
|
|
f128Const( 0x3DE09BFE7923A338, 0xBCC8FBBD7CEC1F4F ),
|
|
f128Const( 0x401CFFFFFFFFFFFF, 0xFFFFFFFEFFFFFF80 )
|
|
};
|
|
|
|
static
|
|
void time_az_f128_pos( void function( const float128_t *, float128_t * ) )
|
|
{
|
|
int_fast64_t count;
|
|
int inputNum;
|
|
clock_t startClock;
|
|
int_fast64_t i;
|
|
float128_t z;
|
|
clock_t endClock;
|
|
|
|
count = 0;
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
do {
|
|
for ( i = minIterations; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
count += minIterations;
|
|
} while ( clock() - startClock < CLOCKS_PER_SEC );
|
|
inputNum = 0;
|
|
startClock = clock();
|
|
for ( i = count; i; --i ) {
|
|
function( &inputs_f128[inputNum].f, &z );
|
|
inputNum = (inputNum + 1) & (numInputs_f128 - 1);
|
|
}
|
|
endClock = clock();
|
|
reportTime( count, endClock - startClock );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------------
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
static
|
|
void
|
|
timeFunctionInstance(
|
|
int functionCode, uint_fast8_t roundingMode, bool exact )
|
|
{
|
|
#ifdef FLOAT16
|
|
float16_t (*function_abz_f16)( float16_t, float16_t );
|
|
bool (*function_ab_f16_z_bool)( float16_t, float16_t );
|
|
#endif
|
|
float32_t (*function_abz_f32)( float32_t, float32_t );
|
|
bool (*function_ab_f32_z_bool)( float32_t, float32_t );
|
|
#ifdef FLOAT64
|
|
float64_t (*function_abz_f64)( float64_t, float64_t );
|
|
bool (*function_ab_f64_z_bool)( float64_t, float64_t );
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
void
|
|
(*function_abz_extF80)(
|
|
const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
|
|
bool
|
|
(*function_ab_extF80_z_bool)(
|
|
const extFloat80_t *, const extFloat80_t * );
|
|
#endif
|
|
#ifdef FLOAT128
|
|
void
|
|
(*function_abz_f128)(
|
|
const float128_t *, const float128_t *, float128_t * );
|
|
bool (*function_ab_f128_z_bool)( const float128_t *, const float128_t * );
|
|
#endif
|
|
|
|
switch ( functionCode ) {
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
#ifdef FLOAT16
|
|
case UI32_TO_F16:
|
|
time_a_ui32_z_f16( ui32_to_f16 );
|
|
break;
|
|
#endif
|
|
case UI32_TO_F32:
|
|
time_a_ui32_z_f32( ui32_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case UI32_TO_F64:
|
|
time_a_ui32_z_f64( ui32_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case UI32_TO_EXTF80:
|
|
time_a_ui32_z_extF80( ui32_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case UI32_TO_F128:
|
|
time_a_ui32_z_f128( ui32_to_f128M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT16
|
|
case UI64_TO_F16:
|
|
time_a_ui64_z_f16( ui64_to_f16 );
|
|
break;
|
|
#endif
|
|
case UI64_TO_F32:
|
|
time_a_ui64_z_f32( ui64_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case UI64_TO_F64:
|
|
time_a_ui64_z_f64( ui64_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case UI64_TO_EXTF80:
|
|
time_a_ui64_z_extF80( ui64_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case UI64_TO_F128:
|
|
time_a_ui64_z_f128( ui64_to_f128M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT16
|
|
case I32_TO_F16:
|
|
time_a_i32_z_f16( i32_to_f16 );
|
|
break;
|
|
#endif
|
|
case I32_TO_F32:
|
|
time_a_i32_z_f32( i32_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case I32_TO_F64:
|
|
time_a_i32_z_f64( i32_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case I32_TO_EXTF80:
|
|
time_a_i32_z_extF80( i32_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case I32_TO_F128:
|
|
time_a_i32_z_f128( i32_to_f128M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT16
|
|
case I64_TO_F16:
|
|
time_a_i64_z_f16( i64_to_f16 );
|
|
break;
|
|
#endif
|
|
case I64_TO_F32:
|
|
time_a_i64_z_f32( i64_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case I64_TO_F64:
|
|
time_a_i64_z_f64( i64_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case I64_TO_EXTF80:
|
|
time_a_i64_z_extF80( i64_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case I64_TO_F128:
|
|
time_a_i64_z_f128( i64_to_f128M );
|
|
break;
|
|
#endif
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
#ifdef FLOAT16
|
|
case F16_TO_UI32:
|
|
time_a_f16_z_ui32_rx( f16_to_ui32, roundingMode, exact );
|
|
break;
|
|
case F16_TO_UI64:
|
|
time_a_f16_z_ui64_rx( f16_to_ui64, roundingMode, exact );
|
|
break;
|
|
case F16_TO_I32:
|
|
time_a_f16_z_i32_rx( f16_to_i32, roundingMode, exact );
|
|
break;
|
|
case F16_TO_I64:
|
|
time_a_f16_z_i64_rx( f16_to_i64, roundingMode, exact );
|
|
break;
|
|
case F16_TO_UI32_R_MINMAG:
|
|
time_a_f16_z_ui32_x( f16_to_ui32_r_minMag, exact );
|
|
break;
|
|
case F16_TO_UI64_R_MINMAG:
|
|
time_a_f16_z_ui64_x( f16_to_ui64_r_minMag, exact );
|
|
break;
|
|
case F16_TO_I32_R_MINMAG:
|
|
time_a_f16_z_i32_x( f16_to_i32_r_minMag, exact );
|
|
break;
|
|
case F16_TO_I64_R_MINMAG:
|
|
time_a_f16_z_i64_x( f16_to_i64_r_minMag, exact );
|
|
break;
|
|
case F16_TO_F32:
|
|
time_a_f16_z_f32( f16_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case F16_TO_F64:
|
|
time_a_f16_z_f64( f16_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case F16_TO_EXTF80:
|
|
time_a_f16_z_extF80( f16_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case F16_TO_F128:
|
|
time_a_f16_z_f128( f16_to_f128M );
|
|
break;
|
|
#endif
|
|
case F16_ROUNDTOINT:
|
|
time_az_f16_rx( f16_roundToInt, roundingMode, exact );
|
|
break;
|
|
case F16_ADD:
|
|
function_abz_f16 = f16_add;
|
|
goto time_abz_f16;
|
|
case F16_SUB:
|
|
function_abz_f16 = f16_sub;
|
|
goto time_abz_f16;
|
|
case F16_MUL:
|
|
function_abz_f16 = f16_mul;
|
|
goto time_abz_f16;
|
|
case F16_DIV:
|
|
function_abz_f16 = f16_div;
|
|
goto time_abz_f16;
|
|
case F16_REM:
|
|
function_abz_f16 = f16_rem;
|
|
time_abz_f16:
|
|
time_abz_f16( function_abz_f16 );
|
|
break;
|
|
case F16_MULADD:
|
|
time_abcz_f16( f16_mulAdd );
|
|
break;
|
|
case F16_SQRT:
|
|
time_az_f16_pos( f16_sqrt );
|
|
break;
|
|
case F16_EQ:
|
|
function_ab_f16_z_bool = f16_eq;
|
|
goto time_ab_f16_z_bool;
|
|
case F16_LE:
|
|
function_ab_f16_z_bool = f16_le;
|
|
goto time_ab_f16_z_bool;
|
|
case F16_LT:
|
|
function_ab_f16_z_bool = f16_lt;
|
|
goto time_ab_f16_z_bool;
|
|
case F16_EQ_SIGNALING:
|
|
function_ab_f16_z_bool = f16_eq_signaling;
|
|
goto time_ab_f16_z_bool;
|
|
case F16_LE_QUIET:
|
|
function_ab_f16_z_bool = f16_le_quiet;
|
|
goto time_ab_f16_z_bool;
|
|
case F16_LT_QUIET:
|
|
function_ab_f16_z_bool = f16_lt_quiet;
|
|
time_ab_f16_z_bool:
|
|
time_ab_f16_z_bool( function_ab_f16_z_bool );
|
|
break;
|
|
#endif
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
case F32_TO_UI32:
|
|
time_a_f32_z_ui32_rx( f32_to_ui32, roundingMode, exact );
|
|
break;
|
|
case F32_TO_UI64:
|
|
time_a_f32_z_ui64_rx( f32_to_ui64, roundingMode, exact );
|
|
break;
|
|
case F32_TO_I32:
|
|
time_a_f32_z_i32_rx( f32_to_i32, roundingMode, exact );
|
|
break;
|
|
case F32_TO_I64:
|
|
time_a_f32_z_i64_rx( f32_to_i64, roundingMode, exact );
|
|
break;
|
|
case F32_TO_UI32_R_MINMAG:
|
|
time_a_f32_z_ui32_x( f32_to_ui32_r_minMag, exact );
|
|
break;
|
|
case F32_TO_UI64_R_MINMAG:
|
|
time_a_f32_z_ui64_x( f32_to_ui64_r_minMag, exact );
|
|
break;
|
|
case F32_TO_I32_R_MINMAG:
|
|
time_a_f32_z_i32_x( f32_to_i32_r_minMag, exact );
|
|
break;
|
|
case F32_TO_I64_R_MINMAG:
|
|
time_a_f32_z_i64_x( f32_to_i64_r_minMag, exact );
|
|
break;
|
|
#ifdef FLOAT16
|
|
case F32_TO_F16:
|
|
time_a_f32_z_f16( f32_to_f16 );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT64
|
|
case F32_TO_F64:
|
|
time_a_f32_z_f64( f32_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case F32_TO_EXTF80:
|
|
time_a_f32_z_extF80( f32_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case F32_TO_F128:
|
|
time_a_f32_z_f128( f32_to_f128M );
|
|
break;
|
|
#endif
|
|
case F32_ROUNDTOINT:
|
|
time_az_f32_rx( f32_roundToInt, roundingMode, exact );
|
|
break;
|
|
case F32_ADD:
|
|
function_abz_f32 = f32_add;
|
|
goto time_abz_f32;
|
|
case F32_SUB:
|
|
function_abz_f32 = f32_sub;
|
|
goto time_abz_f32;
|
|
case F32_MUL:
|
|
function_abz_f32 = f32_mul;
|
|
goto time_abz_f32;
|
|
case F32_DIV:
|
|
function_abz_f32 = f32_div;
|
|
goto time_abz_f32;
|
|
case F32_REM:
|
|
function_abz_f32 = f32_rem;
|
|
time_abz_f32:
|
|
time_abz_f32( function_abz_f32 );
|
|
break;
|
|
case F32_MULADD:
|
|
time_abcz_f32( f32_mulAdd );
|
|
break;
|
|
case F32_SQRT:
|
|
time_az_f32_pos( f32_sqrt );
|
|
break;
|
|
case F32_EQ:
|
|
function_ab_f32_z_bool = f32_eq;
|
|
goto time_ab_f32_z_bool;
|
|
case F32_LE:
|
|
function_ab_f32_z_bool = f32_le;
|
|
goto time_ab_f32_z_bool;
|
|
case F32_LT:
|
|
function_ab_f32_z_bool = f32_lt;
|
|
goto time_ab_f32_z_bool;
|
|
case F32_EQ_SIGNALING:
|
|
function_ab_f32_z_bool = f32_eq_signaling;
|
|
goto time_ab_f32_z_bool;
|
|
case F32_LE_QUIET:
|
|
function_ab_f32_z_bool = f32_le_quiet;
|
|
goto time_ab_f32_z_bool;
|
|
case F32_LT_QUIET:
|
|
function_ab_f32_z_bool = f32_lt_quiet;
|
|
time_ab_f32_z_bool:
|
|
time_ab_f32_z_bool( function_ab_f32_z_bool );
|
|
break;
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
#ifdef FLOAT64
|
|
case F64_TO_UI32:
|
|
time_a_f64_z_ui32_rx( f64_to_ui32, roundingMode, exact );
|
|
break;
|
|
case F64_TO_UI64:
|
|
time_a_f64_z_ui64_rx( f64_to_ui64, roundingMode, exact );
|
|
break;
|
|
case F64_TO_I32:
|
|
time_a_f64_z_i32_rx( f64_to_i32, roundingMode, exact );
|
|
break;
|
|
case F64_TO_I64:
|
|
time_a_f64_z_i64_rx( f64_to_i64, roundingMode, exact );
|
|
break;
|
|
case F64_TO_UI32_R_MINMAG:
|
|
time_a_f64_z_ui32_x( f64_to_ui32_r_minMag, exact );
|
|
break;
|
|
case F64_TO_UI64_R_MINMAG:
|
|
time_a_f64_z_ui64_x( f64_to_ui64_r_minMag, exact );
|
|
break;
|
|
case F64_TO_I32_R_MINMAG:
|
|
time_a_f64_z_i32_x( f64_to_i32_r_minMag, exact );
|
|
break;
|
|
case F64_TO_I64_R_MINMAG:
|
|
time_a_f64_z_i64_x( f64_to_i64_r_minMag, exact );
|
|
break;
|
|
#ifdef FLOAT16
|
|
case F64_TO_F16:
|
|
time_a_f64_z_f16( f64_to_f16 );
|
|
break;
|
|
#endif
|
|
case F64_TO_F32:
|
|
time_a_f64_z_f32( f64_to_f32 );
|
|
break;
|
|
#ifdef EXTFLOAT80
|
|
case F64_TO_EXTF80:
|
|
time_a_f64_z_extF80( f64_to_extF80M );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case F64_TO_F128:
|
|
time_a_f64_z_f128( f64_to_f128M );
|
|
break;
|
|
#endif
|
|
case F64_ROUNDTOINT:
|
|
time_az_f64_rx( f64_roundToInt, roundingMode, exact );
|
|
break;
|
|
case F64_ADD:
|
|
function_abz_f64 = f64_add;
|
|
goto time_abz_f64;
|
|
case F64_SUB:
|
|
function_abz_f64 = f64_sub;
|
|
goto time_abz_f64;
|
|
case F64_MUL:
|
|
function_abz_f64 = f64_mul;
|
|
goto time_abz_f64;
|
|
case F64_DIV:
|
|
function_abz_f64 = f64_div;
|
|
goto time_abz_f64;
|
|
case F64_REM:
|
|
function_abz_f64 = f64_rem;
|
|
time_abz_f64:
|
|
time_abz_f64( function_abz_f64 );
|
|
break;
|
|
case F64_MULADD:
|
|
time_abcz_f64( f64_mulAdd );
|
|
break;
|
|
case F64_SQRT:
|
|
time_az_f64_pos( f64_sqrt );
|
|
break;
|
|
case F64_EQ:
|
|
function_ab_f64_z_bool = f64_eq;
|
|
goto time_ab_f64_z_bool;
|
|
case F64_LE:
|
|
function_ab_f64_z_bool = f64_le;
|
|
goto time_ab_f64_z_bool;
|
|
case F64_LT:
|
|
function_ab_f64_z_bool = f64_lt;
|
|
goto time_ab_f64_z_bool;
|
|
case F64_EQ_SIGNALING:
|
|
function_ab_f64_z_bool = f64_eq_signaling;
|
|
goto time_ab_f64_z_bool;
|
|
case F64_LE_QUIET:
|
|
function_ab_f64_z_bool = f64_le_quiet;
|
|
goto time_ab_f64_z_bool;
|
|
case F64_LT_QUIET:
|
|
function_ab_f64_z_bool = f64_lt_quiet;
|
|
time_ab_f64_z_bool:
|
|
time_ab_f64_z_bool( function_ab_f64_z_bool );
|
|
break;
|
|
#endif
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
#ifdef EXTFLOAT80
|
|
case EXTF80_TO_UI32:
|
|
time_a_extF80_z_ui32_rx( extF80M_to_ui32, roundingMode, exact );
|
|
break;
|
|
case EXTF80_TO_UI64:
|
|
time_a_extF80_z_ui64_rx( extF80M_to_ui64, roundingMode, exact );
|
|
break;
|
|
case EXTF80_TO_I32:
|
|
time_a_extF80_z_i32_rx( extF80M_to_i32, roundingMode, exact );
|
|
break;
|
|
case EXTF80_TO_I64:
|
|
time_a_extF80_z_i64_rx( extF80M_to_i64, roundingMode, exact );
|
|
break;
|
|
case EXTF80_TO_UI32_R_MINMAG:
|
|
time_a_extF80_z_ui32_x( extF80M_to_ui32_r_minMag, exact );
|
|
break;
|
|
case EXTF80_TO_UI64_R_MINMAG:
|
|
time_a_extF80_z_ui64_x( extF80M_to_ui64_r_minMag, exact );
|
|
break;
|
|
case EXTF80_TO_I32_R_MINMAG:
|
|
time_a_extF80_z_i32_x( extF80M_to_i32_r_minMag, exact );
|
|
break;
|
|
case EXTF80_TO_I64_R_MINMAG:
|
|
time_a_extF80_z_i64_x( extF80M_to_i64_r_minMag, exact );
|
|
break;
|
|
#ifdef FLOAT16
|
|
case EXTF80_TO_F16:
|
|
time_a_extF80_z_f16( extF80M_to_f16 );
|
|
break;
|
|
#endif
|
|
case EXTF80_TO_F32:
|
|
time_a_extF80_z_f32( extF80M_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case EXTF80_TO_F64:
|
|
time_a_extF80_z_f64( extF80M_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef FLOAT128
|
|
case EXTF80_TO_F128:
|
|
time_a_extF80_z_f128( extF80M_to_f128M );
|
|
break;
|
|
#endif
|
|
case EXTF80_ROUNDTOINT:
|
|
time_az_extF80_rx( extF80M_roundToInt, roundingMode, exact );
|
|
break;
|
|
case EXTF80_ADD:
|
|
function_abz_extF80 = extF80M_add;
|
|
goto time_abz_extF80;
|
|
case EXTF80_SUB:
|
|
function_abz_extF80 = extF80M_sub;
|
|
goto time_abz_extF80;
|
|
case EXTF80_MUL:
|
|
function_abz_extF80 = extF80M_mul;
|
|
goto time_abz_extF80;
|
|
case EXTF80_DIV:
|
|
function_abz_extF80 = extF80M_div;
|
|
goto time_abz_extF80;
|
|
case EXTF80_REM:
|
|
function_abz_extF80 = extF80M_rem;
|
|
time_abz_extF80:
|
|
time_abz_extF80( function_abz_extF80 );
|
|
break;
|
|
case EXTF80_SQRT:
|
|
time_az_extF80_pos( extF80M_sqrt );
|
|
break;
|
|
case EXTF80_EQ:
|
|
function_ab_extF80_z_bool = extF80M_eq;
|
|
goto time_ab_extF80_z_bool;
|
|
case EXTF80_LE:
|
|
function_ab_extF80_z_bool = extF80M_le;
|
|
goto time_ab_extF80_z_bool;
|
|
case EXTF80_LT:
|
|
function_ab_extF80_z_bool = extF80M_lt;
|
|
goto time_ab_extF80_z_bool;
|
|
case EXTF80_EQ_SIGNALING:
|
|
function_ab_extF80_z_bool = extF80M_eq_signaling;
|
|
goto time_ab_extF80_z_bool;
|
|
case EXTF80_LE_QUIET:
|
|
function_ab_extF80_z_bool = extF80M_le_quiet;
|
|
goto time_ab_extF80_z_bool;
|
|
case EXTF80_LT_QUIET:
|
|
function_ab_extF80_z_bool = extF80M_lt_quiet;
|
|
time_ab_extF80_z_bool:
|
|
time_ab_extF80_z_bool( function_ab_extF80_z_bool );
|
|
break;
|
|
#endif
|
|
/*--------------------------------------------------------------------
|
|
*--------------------------------------------------------------------*/
|
|
#ifdef FLOAT128
|
|
case F128_TO_UI32:
|
|
time_a_f128_z_ui32_rx( f128M_to_ui32, roundingMode, exact );
|
|
break;
|
|
case F128_TO_UI64:
|
|
time_a_f128_z_ui64_rx( f128M_to_ui64, roundingMode, exact );
|
|
break;
|
|
case F128_TO_I32:
|
|
time_a_f128_z_i32_rx( f128M_to_i32, roundingMode, exact );
|
|
break;
|
|
case F128_TO_I64:
|
|
time_a_f128_z_i64_rx( f128M_to_i64, roundingMode, exact );
|
|
break;
|
|
case F128_TO_UI32_R_MINMAG:
|
|
time_a_f128_z_ui32_x( f128M_to_ui32_r_minMag, exact );
|
|
break;
|
|
case F128_TO_UI64_R_MINMAG:
|
|
time_a_f128_z_ui64_x( f128M_to_ui64_r_minMag, exact );
|
|
break;
|
|
case F128_TO_I32_R_MINMAG:
|
|
time_a_f128_z_i32_x( f128M_to_i32_r_minMag, exact );
|
|
break;
|
|
case F128_TO_I64_R_MINMAG:
|
|
time_a_f128_z_i64_x( f128M_to_i64_r_minMag, exact );
|
|
break;
|
|
#ifdef FLOAT16
|
|
case F128_TO_F16:
|
|
time_a_f128_z_f16( f128M_to_f16 );
|
|
break;
|
|
#endif
|
|
case F128_TO_F32:
|
|
time_a_f128_z_f32( f128M_to_f32 );
|
|
break;
|
|
#ifdef FLOAT64
|
|
case F128_TO_F64:
|
|
time_a_f128_z_f64( f128M_to_f64 );
|
|
break;
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
case F128_TO_EXTF80:
|
|
time_a_f128_z_extF80( f128M_to_extF80M );
|
|
break;
|
|
#endif
|
|
case F128_ROUNDTOINT:
|
|
time_az_f128_rx( f128M_roundToInt, roundingMode, exact );
|
|
break;
|
|
case F128_ADD:
|
|
function_abz_f128 = f128M_add;
|
|
goto time_abz_f128;
|
|
case F128_SUB:
|
|
function_abz_f128 = f128M_sub;
|
|
goto time_abz_f128;
|
|
case F128_MUL:
|
|
function_abz_f128 = f128M_mul;
|
|
goto time_abz_f128;
|
|
case F128_DIV:
|
|
function_abz_f128 = f128M_div;
|
|
goto time_abz_f128;
|
|
case F128_REM:
|
|
function_abz_f128 = f128M_rem;
|
|
time_abz_f128:
|
|
time_abz_f128( function_abz_f128 );
|
|
break;
|
|
case F128_MULADD:
|
|
time_abcz_f128( f128M_mulAdd );
|
|
break;
|
|
case F128_SQRT:
|
|
time_az_f128_pos( f128M_sqrt );
|
|
break;
|
|
case F128_EQ:
|
|
function_ab_f128_z_bool = f128M_eq;
|
|
goto time_ab_f128_z_bool;
|
|
case F128_LE:
|
|
function_ab_f128_z_bool = f128M_le;
|
|
goto time_ab_f128_z_bool;
|
|
case F128_LT:
|
|
function_ab_f128_z_bool = f128M_lt;
|
|
goto time_ab_f128_z_bool;
|
|
case F128_EQ_SIGNALING:
|
|
function_ab_f128_z_bool = f128M_eq_signaling;
|
|
goto time_ab_f128_z_bool;
|
|
case F128_LE_QUIET:
|
|
function_ab_f128_z_bool = f128M_le_quiet;
|
|
goto time_ab_f128_z_bool;
|
|
case F128_LT_QUIET:
|
|
function_ab_f128_z_bool = f128M_lt_quiet;
|
|
time_ab_f128_z_bool:
|
|
time_ab_f128_z_bool( function_ab_f128_z_bool );
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
}
|
|
|
|
enum { EXACT_FALSE = 1, EXACT_TRUE };
|
|
|
|
static
|
|
void
|
|
timeFunction(
|
|
int functionCode,
|
|
uint_fast8_t roundingPrecisionIn,
|
|
int roundingCodeIn,
|
|
int tininessCodeIn,
|
|
int exactCodeIn
|
|
)
|
|
{
|
|
int functionAttribs, exactCode;
|
|
uint_fast8_t roundingMode, tininessMode;
|
|
|
|
functionNamePtr = functionInfos[functionCode].namePtr;
|
|
functionAttribs = functionInfos[functionCode].attribs;
|
|
roundingPrecision = 32;
|
|
for (;;) {
|
|
if ( functionAttribs & FUNC_EFF_ROUNDINGPRECISION ) {
|
|
if ( roundingPrecisionIn ) roundingPrecision = roundingPrecisionIn;
|
|
} else {
|
|
roundingPrecision = 0;
|
|
}
|
|
#ifdef EXTFLOAT80
|
|
if ( roundingPrecision ) extF80_roundingPrecision = roundingPrecision;
|
|
#endif
|
|
for (
|
|
roundingCode = 1; roundingCode < NUM_ROUNDINGMODES; ++roundingCode
|
|
) {
|
|
if (
|
|
functionAttribs
|
|
& (FUNC_ARG_ROUNDINGMODE | FUNC_EFF_ROUNDINGMODE)
|
|
) {
|
|
if ( roundingCodeIn ) roundingCode = roundingCodeIn;
|
|
} else {
|
|
roundingCode = 0;
|
|
}
|
|
if ( roundingCode ) {
|
|
roundingMode = roundingModes[roundingCode];
|
|
if ( functionAttribs & FUNC_EFF_ROUNDINGMODE ) {
|
|
softfloat_roundingMode = roundingMode;
|
|
}
|
|
}
|
|
for (
|
|
exactCode = EXACT_FALSE; exactCode <= EXACT_TRUE; ++exactCode
|
|
) {
|
|
if ( functionAttribs & FUNC_ARG_EXACT ) {
|
|
if ( exactCodeIn ) exactCode = exactCodeIn;
|
|
} else {
|
|
exactCode = 0;
|
|
}
|
|
exact = (exactCode == EXACT_TRUE );
|
|
usesExact = (exactCode != 0 );
|
|
for (
|
|
tininessCode = 1;
|
|
tininessCode < NUM_TININESSMODES;
|
|
++tininessCode
|
|
) {
|
|
if (
|
|
(functionAttribs & FUNC_EFF_TININESSMODE)
|
|
|| ((functionAttribs
|
|
& FUNC_EFF_TININESSMODE_REDUCEDPREC)
|
|
&& roundingPrecision
|
|
&& (roundingPrecision < 80))
|
|
) {
|
|
if ( tininessCodeIn ) tininessCode = tininessCodeIn;
|
|
} else {
|
|
tininessCode = 0;
|
|
}
|
|
if ( tininessCode ) {
|
|
tininessMode = tininessModes[tininessCode];
|
|
softfloat_detectTininess = tininessMode;
|
|
}
|
|
timeFunctionInstance( functionCode, roundingMode, exact );
|
|
if ( tininessCodeIn || ! tininessCode ) break;
|
|
}
|
|
if ( exactCodeIn || ! exactCode ) break;
|
|
}
|
|
if ( roundingCodeIn || ! roundingCode ) break;
|
|
}
|
|
if ( roundingPrecisionIn || ! roundingPrecision ) break;
|
|
if ( roundingPrecision == 80 ) {
|
|
break;
|
|
} else if ( roundingPrecision == 64 ) {
|
|
roundingPrecision = 80;
|
|
} else if ( roundingPrecision == 32 ) {
|
|
roundingPrecision = 64;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
int main( int argc, char *argv[] )
|
|
{
|
|
bool haveFunctionArg;
|
|
int functionCode, numOperands;
|
|
uint_fast8_t roundingPrecision;
|
|
int roundingCode, tininessCode, exactCode;
|
|
const char *argPtr;
|
|
|
|
fail_programName = "timesoftfloat";
|
|
if ( argc <= 1 ) goto writeHelpMessage;
|
|
haveFunctionArg = false;
|
|
functionCode = 0;
|
|
numOperands = 0;
|
|
roundingPrecision = 0;
|
|
roundingCode = 0;
|
|
tininessCode = 0;
|
|
exactCode = 0;
|
|
for (;;) {
|
|
--argc;
|
|
if ( ! argc ) break;
|
|
argPtr = *++argv;
|
|
if ( ! argPtr ) break;
|
|
if ( argPtr[0] == '-' ) ++argPtr;
|
|
if (
|
|
! strcmp( argPtr, "help" ) || ! strcmp( argPtr, "-help" )
|
|
|| ! strcmp( argPtr, "h" )
|
|
) {
|
|
writeHelpMessage:
|
|
fputs(
|
|
"timesoftfloat [<option>...] <function>\n"
|
|
" <option>: (* is default)\n"
|
|
" -help --Write this message and exit.\n"
|
|
#ifdef EXTFLOAT80
|
|
" -precision32 --For extF80, time only 32-bit rounding precision.\n"
|
|
" -precision64 --For extF80, time only 64-bit rounding precision.\n"
|
|
" -precision80 --For extF80, time only 80-bit rounding precision.\n"
|
|
#endif
|
|
" -rnear_even --Time only rounding to nearest/even.\n"
|
|
" -rminMag --Time only rounding to minimum magnitude (toward zero).\n"
|
|
" -rmin --Time only rounding to minimum (down).\n"
|
|
" -rmax --Time only rounding to maximum (up).\n"
|
|
" -rnear_maxMag --Time only rounding to nearest/maximum magnitude\n"
|
|
" (nearest/away).\n"
|
|
#ifdef FLOAT_ROUND_ODD
|
|
" -rodd --Time only rounding to odd (jamming).\n"
|
|
#endif
|
|
" -tininessbefore --Time only underflow tininess detected before rounding.\n"
|
|
" -tininessafter --Time only underflow tininess detected after rounding.\n"
|
|
" -notexact --Time only non-exact rounding to integer (no inexact\n"
|
|
" exception).\n"
|
|
" -exact --Time only exact rounding to integer (allow inexact\n"
|
|
" exception).\n"
|
|
" <function>:\n"
|
|
" <int>_to_<float> <float>_add <float>_eq\n"
|
|
" <float>_to_<int> <float>_sub <float>_le\n"
|
|
" <float>_to_<int>_r_minMag <float>_mul <float>_lt\n"
|
|
" <float>_to_<float> <float>_mulAdd <float>_eq_signaling\n"
|
|
" <float>_roundToInt <float>_div <float>_le_quiet\n"
|
|
" <float>_rem <float>_lt_quiet\n"
|
|
" <float>_sqrt\n"
|
|
" -all1 --All unary functions.\n"
|
|
" -all2 --All binary functions.\n"
|
|
" -all --All functions.\n"
|
|
" <int>:\n"
|
|
" ui32 --Unsigned 32-bit integer.\n"
|
|
" ui64 --Unsigned 64-bit integer.\n"
|
|
" i32 --Signed 32-bit integer.\n"
|
|
" i64 --Signed 64-bit integer.\n"
|
|
" <float>:\n"
|
|
#ifdef FLOAT16
|
|
" f16 --Binary 16-bit floating-point (half-precision).\n"
|
|
#endif
|
|
" f32 --Binary 32-bit floating-point (single-precision).\n"
|
|
#ifdef FLOAT64
|
|
" f64 --Binary 64-bit floating-point (double-precision).\n"
|
|
#endif
|
|
#ifdef EXTFLOAT80
|
|
" extF80 --Binary 80-bit extended floating-point.\n"
|
|
#endif
|
|
#ifdef FLOAT128
|
|
" f128 --Binary 128-bit floating-point (quadruple-precision).\n"
|
|
#endif
|
|
,
|
|
stdout
|
|
);
|
|
return EXIT_SUCCESS;
|
|
#ifdef EXTFLOAT80
|
|
} else if ( ! strcmp( argPtr, "precision32" ) ) {
|
|
roundingPrecision = 32;
|
|
} else if ( ! strcmp( argPtr, "precision64" ) ) {
|
|
roundingPrecision = 64;
|
|
} else if ( ! strcmp( argPtr, "precision80" ) ) {
|
|
roundingPrecision = 80;
|
|
#endif
|
|
} else if (
|
|
! strcmp( argPtr, "rnear_even" )
|
|
|| ! strcmp( argPtr, "rneareven" )
|
|
|| ! strcmp( argPtr, "rnearest_even" )
|
|
) {
|
|
roundingCode = ROUND_NEAR_EVEN;
|
|
} else if (
|
|
! strcmp( argPtr, "rminmag" ) || ! strcmp( argPtr, "rminMag" )
|
|
) {
|
|
roundingCode = ROUND_MINMAG;
|
|
} else if ( ! strcmp( argPtr, "rmin" ) ) {
|
|
roundingCode = ROUND_MIN;
|
|
} else if ( ! strcmp( argPtr, "rmax" ) ) {
|
|
roundingCode = ROUND_MAX;
|
|
} else if (
|
|
! strcmp( argPtr, "rnear_maxmag" )
|
|
|| ! strcmp( argPtr, "rnear_maxMag" )
|
|
|| ! strcmp( argPtr, "rnearmaxmag" )
|
|
|| ! strcmp( argPtr, "rnearest_maxmag" )
|
|
|| ! strcmp( argPtr, "rnearest_maxMag" )
|
|
) {
|
|
roundingCode = ROUND_NEAR_MAXMAG;
|
|
#ifdef FLOAT_ROUND_ODD
|
|
} else if ( ! strcmp( argPtr, "rodd" ) ) {
|
|
roundingCode = ROUND_ODD;
|
|
#endif
|
|
} else if ( ! strcmp( argPtr, "tininessbefore" ) ) {
|
|
tininessCode = TININESS_BEFORE_ROUNDING;
|
|
} else if ( ! strcmp( argPtr, "tininessafter" ) ) {
|
|
tininessCode = TININESS_AFTER_ROUNDING;
|
|
} else if ( ! strcmp( argPtr, "notexact" ) ) {
|
|
exactCode = EXACT_FALSE;
|
|
} else if ( ! strcmp( argPtr, "exact" ) ) {
|
|
exactCode = EXACT_TRUE;
|
|
} else if ( ! strcmp( argPtr, "all1" ) ) {
|
|
haveFunctionArg = true;
|
|
functionCode = 0;
|
|
numOperands = 1;
|
|
} else if ( ! strcmp( argPtr, "all2" ) ) {
|
|
haveFunctionArg = true;
|
|
functionCode = 0;
|
|
numOperands = 2;
|
|
} else if ( ! strcmp( argPtr, "all" ) ) {
|
|
haveFunctionArg = true;
|
|
functionCode = 0;
|
|
numOperands = 0;
|
|
} else {
|
|
functionCode = 1;
|
|
while ( strcmp( argPtr, functionInfos[functionCode].namePtr ) ) {
|
|
++functionCode;
|
|
if ( functionCode == NUM_FUNCTIONS ) {
|
|
fail( "Invalid argument '%s'", *argv );
|
|
}
|
|
}
|
|
haveFunctionArg = true;
|
|
}
|
|
}
|
|
if ( ! haveFunctionArg ) fail( "Function argument required" );
|
|
if ( functionCode ) {
|
|
timeFunction(
|
|
functionCode,
|
|
roundingPrecision,
|
|
roundingCode,
|
|
tininessCode,
|
|
exactCode
|
|
);
|
|
} else {
|
|
for (
|
|
functionCode = 1; functionCode < NUM_FUNCTIONS; ++functionCode
|
|
) {
|
|
if (
|
|
! numOperands
|
|
|| (functionInfos[functionCode].attribs
|
|
& ((numOperands == 1) ? FUNC_ARG_UNARY
|
|
: FUNC_ARG_BINARY))
|
|
) {
|
|
timeFunction(
|
|
functionCode,
|
|
roundingPrecision,
|
|
roundingCode,
|
|
tininessCode,
|
|
exactCode
|
|
);
|
|
}
|
|
}
|
|
}
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|