| version 1.1.1.1, 2000/09/09 14:13:19 |
version 1.1.1.2, 2003/08/25 16:06:38 |
|
|
| /* Time routines for speed measurments. */ |
/* Time routines for speed measurments. |
| |
|
| /* |
Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
| Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
| |
|
| This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
| |
|
| Line 18 License for more details. |
|
| Line 17 License for more details. |
|
| You should have received a copy of the GNU Lesser General Public License |
You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| MA 02111-1307, USA. |
MA 02111-1307, USA. */ |
| */ |
|
| |
|
| |
|
| /* speed_time_init() - initialize timing things. speed_starttime() calls |
/* speed_time_init() - initialize timing things. speed_starttime() calls |
| this if it hasn't been done yet, so you only need to call this explicitly |
this if it hasn't been done yet, so you only need to call this explicitly |
| if you want to use the global variables before the first measurement. |
if you want to use the global variables before the first measurement. |
| |
|
| speed_starttime() - start a time measurment. |
speed_starttime() - start a time measurment. |
| |
|
| speed_endtime() - end a time measurment, return time taken, in seconds. |
speed_endtime() - end a time measurment, return time taken (seconds or |
| |
cycles). |
| |
|
| |
speed_time_string - a string describing the time method in use. |
| |
|
| speed_unittime - global variable with the unit of time measurement |
speed_unittime - global variable with the unit of time measurement |
| accuracy, in seconds. |
accuracy (seconds or cycles). |
| |
|
| speed_precision - global variable which is the intended accuracy of time |
speed_precision - global variable which is the intended accuracy of time |
| measurements. speed_measure() for instance runs target routines with |
measurements. speed_measure() for instance runs target routines with |
| enough repetitions so it takes at least speed_unittime*speed_precision |
enough repetitions so it takes at least speed_unittime*speed_precision. |
| seconds. A program can provide an option so the user can set this. |
A program can provide an option so the user can set this, otherwise it |
| |
gets a default based on the measuring method chosen. |
| |
|
| speed_cycletime - the time in seconds for each CPU cycle, for example on |
speed_cycletime - the time in seconds for each CPU cycle, for example on |
| a 100 MHz CPU this would be 1.0e-8. If the CPU frequency is unknown, |
a 100 MHz CPU this would be 1.0e-8. If the CPU frequency is unknown this |
| speed_cycletime is 1.0. See speed_cycletime_init(). |
is 0.0 if the time base is in seconds, or 1.0 if it's in cycles. |
| |
|
| speed_time_string - a string describing the time method in use. |
|
| |
|
| |
speed_endtime() and speed_unittime are normally in seconds, but if a |
| |
cycle counter is being used to measure and the CPU frequency is unknown, |
| |
then speed_endtime() returns cycles and speed_cycletime and |
| |
speed_unittime are 1.0. |
| |
|
| |
Notice that speed_unittime*speed_precision is the target duration for |
| |
speed_endtime(), irrespective of whether that's in seconds or cycles. |
| |
|
| |
Call speed_cycletime_need_seconds() to demand that speed_endtime() is in |
| |
seconds and not perhaps in cycles. |
| |
|
| |
Call speed_cycletime_need_cycles() to demand that speed_cycletime is |
| |
non-zero, so that speed_endtime()/speed_cycletime will work to give times |
| |
in cycles. |
| |
|
| |
|
| |
Notes: |
| |
|
| |
Various combinations of cycle counter, read_real_time(), getrusage(), |
| |
gettimeofday() and times() can arise, according to which are available |
| |
and their precision. |
| |
|
| |
|
| |
Allowing speed_endtime() to return either seconds or cycles is only a |
| |
slight complication and makes it possible for the speed program to do |
| |
some sensible things without demanding the CPU frequency. If seconds are |
| |
being measured then it can always print seconds, and if cycles are being |
| |
measured then it can always print them without needing to know how long |
| |
they are. Also the tune program doesn't care at all what the units are. |
| |
|
| |
GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c |
| |
fail. This will be needed if times in seconds are wanted but a cycle |
| |
counter is being used, or if times in cycles are wanted but getrusage or |
| |
another seconds based timer is in use. |
| |
|
| |
If the measuring method uses a cycle counter but supplements it with |
| |
getrusage or the like, then knowing the CPU frequency is mandatory since |
| |
the code compares values from the two. |
| |
|
| |
|
| |
Solaris gethrtime() seems no more than a slow way to access the Sparc V9 |
| |
cycle counter. gethrvtime() seems to be relevant only to LWP, it doesn't |
| |
for instance give nanosecond virtual time. So neither of these are used. |
| |
|
| |
|
| |
Bugs: |
| |
|
| |
getrusage_microseconds_p is fundamentally flawed, getrusage and |
| |
gettimeofday can have resolutions other than clock ticks or microseconds, |
| |
for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms. |
| |
|
| Enhancements: |
Enhancements: |
| |
|
| Add support for accurate timing on more CPUs, machines and systems. |
The SGI hardware counter has 64 bits on some machines, which could be |
| |
used when available. But perhaps 32 bits is enough range, and then rely |
| |
on the getrusage supplement. |
| |
|
| Extend automatic CPU frequency determination to more kernels and systems. |
Maybe getrusage (or times) should be used as a supplement for any |
| |
wall-clock measuring method. Currently a wall clock with a good range |
| |
(eg. a 64-bit cycle counter) is used without a supplement. |
| |
|
| */ |
On PowerPC the timebase registers could be used, but would have to do |
| |
something to find out the speed. On 6xx chips it's normally 1/4 bus |
| |
speed, on 4xx chips it's wither that or an external clock. Measuring |
| |
against gettimeofday might be ok. */ |
| |
|
| |
|
| |
#include "config.h" |
| |
|
| |
#include <errno.h> |
| |
#include <setjmp.h> |
| |
#include <signal.h> |
| |
#include <stddef.h> |
| #include <stdio.h> |
#include <stdio.h> |
| #include <stdlib.h> /* for getenv */ |
#include <string.h> |
| |
#include <stdlib.h> /* for getenv() */ |
| |
|
| |
#if HAVE_FCNTL_H |
| |
#include <fcntl.h> /* for open() */ |
| |
#endif |
| |
|
| |
#if HAVE_STDINT_H |
| |
#include <stdint.h> /* for uint64_t */ |
| |
#endif |
| |
|
| #if HAVE_UNISTD_H |
#if HAVE_UNISTD_H |
| #include <unistd.h> |
#include <unistd.h> /* for sysconf() */ |
| #endif |
#endif |
| |
|
| #include <sys/types.h> |
#include <sys/types.h> |
| #if HAVE_SYS_SYSCTL_H |
|
| #include <sys/sysctl.h> |
#if TIME_WITH_SYS_TIME |
| |
# include <sys/time.h> /* for struct timeval */ |
| |
# include <time.h> |
| |
#else |
| |
# if HAVE_SYS_TIME_H |
| |
# include <sys/time.h> |
| |
# else |
| |
# include <time.h> |
| |
# endif |
| #endif |
#endif |
| |
|
| |
#if HAVE_SYS_MMAN_H |
| |
#include <sys/mman.h> /* for mmap() */ |
| |
#endif |
| |
|
| |
#if HAVE_SYS_RESOURCE_H |
| |
#include <sys/resource.h> /* for struct rusage */ |
| |
#endif |
| |
|
| |
#if HAVE_SYS_SYSSGI_H |
| |
#include <sys/syssgi.h> /* for syssgi() */ |
| |
#endif |
| |
|
| |
#if HAVE_SYS_SYSTEMCFG_H |
| |
#include <sys/systemcfg.h> /* for RTC_POWER on AIX */ |
| |
#endif |
| |
|
| |
#if HAVE_SYS_TIMES_H |
| |
#include <sys/times.h> /* for times() and struct tms */ |
| |
#endif |
| |
|
| #include "gmp.h" |
#include "gmp.h" |
| #include "gmp-impl.h" |
#include "gmp-impl.h" |
| #include "longlong.h" |
|
| |
|
| #include "speed.h" |
#include "speed.h" |
| |
|
| |
|
| |
|
| |
char speed_time_string[256]; |
| |
int speed_precision = 0; |
| |
double speed_unittime; |
| |
double speed_cycletime = 0.0; |
| |
|
| |
|
| |
/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4 |
| |
native cc */ |
| |
#define M_2POWU (((double) INT_MAX + 1.0) * 2.0) |
| |
|
| |
#define M_2POW32 4294967296.0 |
| |
#define M_2POW64 (M_2POW32 * M_2POW32) |
| |
|
| |
|
| |
/* Conditionals for the time functions available are done with normal C |
| |
code, which is a lot easier than wildly nested preprocessor directives. |
| |
|
| |
The choice of what to use is partly made at run-time, according to |
| |
whether the cycle counter works and the measured accuracy of getrusage |
| |
and gettimeofday. |
| |
|
| |
A routine that's not available won't be getting called, but is an abort() |
| |
to be sure it isn't called mistakenly. |
| |
|
| |
It can be assumed that if a function exists then its data type will, but |
| |
if the function doesn't then the data type might or might not exist, so |
| |
the type can't be used unconditionally. The "struct_rusage" etc macros |
| |
provide dummies when the respective function doesn't exist. */ |
| |
|
| |
|
| #if HAVE_SPEED_CYCLECOUNTER |
#if HAVE_SPEED_CYCLECOUNTER |
| #define SPEED_USE_CYCLECOUNTER 1 |
static const int have_cycles = HAVE_SPEED_CYCLECOUNTER; |
| #else |
#else |
| #define SPEED_USE_MICROSECOND_GETRUSAGE 0 |
static const int have_cycles = 0; |
| #define SPEED_USE_MICROSECOND_GETTIMEOFDAY 1 |
#define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available) |
| #define SPEED_USE_TMS_UTIME 0 |
|
| #endif |
#endif |
| |
|
| |
/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12 |
| |
microseconds. Same #ifdefs here as in longlong.h. */ |
| |
#if defined (__GNUC__) && ! defined (NO_ASM) \ |
| |
&& (defined (__i370__) || defined (__s390__) || defined (__mvs__)) |
| |
static const int have_stck = 1; |
| |
static const int use_stck = 1; /* always use when available */ |
| |
typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */ |
| |
#define STCK(timestamp) \ |
| |
do { \ |
| |
asm ("stck %0" : "=m" (timestamp)); \ |
| |
} while (0) |
| |
#else |
| |
static const int have_stck = 0; |
| |
static const int use_stck = 0; |
| |
typedef unsigned long stck_t; /* dummy */ |
| |
#define STCK(timestamp) ASSERT_FAIL (stck instruction not available) |
| |
#endif |
| |
#define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */ |
| |
|
| #define TIMEVAL_SECS(tp) \ |
/* Unicos 10.X has syssgi(), but not mmap(). */ |
| ((double) (tp)->tv_sec + (double) (tp)->tv_usec * 1.0e-6) |
#if HAVE_SYSSGI && HAVE_MMAP |
| |
static const int have_sgi = 1; |
| |
#else |
| |
static const int have_sgi = 0; |
| |
#endif |
| |
|
| |
#if HAVE_READ_REAL_TIME |
| |
static const int have_rrt = 1; |
| |
#else |
| |
static const int have_rrt = 0; |
| |
#define read_real_time(t,s) ASSERT_FAIL (read_real_time not available) |
| |
#define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available) |
| |
#define RTC_POWER 1 |
| |
#define RTC_POWER_PC 2 |
| |
#define timebasestruct_t struct timebasestruct_dummy |
| |
struct timebasestruct_dummy { |
| |
int flag; |
| |
unsigned int tb_high; |
| |
unsigned int tb_low; |
| |
}; |
| |
#endif |
| |
|
| /* Look for an environment variable for CPU clock frequency. |
#if HAVE_CLOCK_GETTIME |
| GMP_CPU_FREQUENCY should be in Hertz, in floating point form, |
static const int have_cgt = 1; |
| eg. "450e6". */ |
#define struct_timespec struct timespec |
| int |
#else |
| speed_cpu_frequency_environment (void) |
static const int have_cgt = 0; |
| |
#define struct_timespec struct timespec_dummy |
| |
#define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1) |
| |
#define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1) |
| |
#endif |
| |
|
| |
#if HAVE_GETRUSAGE |
| |
static const int have_grus = 1; |
| |
#define struct_rusage struct rusage |
| |
#else |
| |
static const int have_grus = 0; |
| |
#define getrusage(n,ru) ASSERT_FAIL (getrusage not available) |
| |
#define struct_rusage struct rusage_dummy |
| |
#endif |
| |
|
| |
#if HAVE_GETTIMEOFDAY |
| |
static const int have_gtod = 1; |
| |
#define struct_timeval struct timeval |
| |
#else |
| |
static const int have_gtod = 0; |
| |
#define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available) |
| |
#define struct_timeval struct timeval_dummy |
| |
#endif |
| |
|
| |
#if HAVE_TIMES |
| |
static const int have_times = 1; |
| |
#define struct_tms struct tms |
| |
#else |
| |
static const int have_times = 0; |
| |
#define times(tms) ASSERT_FAIL (times not available) |
| |
#define struct_tms struct tms_dummy |
| |
#endif |
| |
|
| |
struct tms_dummy { |
| |
long tms_utime; |
| |
}; |
| |
struct timeval_dummy { |
| |
long tv_sec; |
| |
long tv_usec; |
| |
}; |
| |
struct rusage_dummy { |
| |
struct_timeval ru_utime; |
| |
}; |
| |
struct timespec_dummy { |
| |
long tv_sec; |
| |
long tv_nsec; |
| |
}; |
| |
|
| |
static int use_cycles; |
| |
static int use_sgi; |
| |
static int use_rrt; |
| |
static int use_cgt; |
| |
static int use_gtod; |
| |
static int use_grus; |
| |
static int use_times; |
| |
static int use_tick_boundary; |
| |
|
| |
static unsigned start_cycles[2]; |
| |
static stck_t start_stck; |
| |
static unsigned start_sgi; |
| |
static timebasestruct_t start_rrt; |
| |
static struct_timespec start_cgt; |
| |
static struct_rusage start_grus; |
| |
static struct_timeval start_gtod; |
| |
static struct_tms start_times; |
| |
|
| |
static double cycles_limit = 1e100; |
| |
static double sgi_unittime; |
| |
static double cgt_unittime; |
| |
static double grus_unittime; |
| |
static double gtod_unittime; |
| |
static double times_unittime; |
| |
|
| |
/* for RTC_POWER format, ie. seconds and nanoseconds */ |
| |
#define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9) |
| |
|
| |
|
| |
/* Return a string representing a time in seconds, nicely formatted. |
| |
Eg. "10.25ms". */ |
| |
char * |
| |
unittime_string (double t) |
| { |
{ |
| char *e; |
static char buf[128]; |
| |
|
| |
const char *unit; |
| |
int prec; |
| |
|
| e = getenv ("GMP_CPU_FREQUENCY"); |
/* choose units and scale */ |
| if (e == NULL) |
if (t < 1e-6) |
| return 0; |
t *= 1e9, unit = "ns"; |
| |
else if (t < 1e-3) |
| |
t *= 1e6, unit = "us"; |
| |
else if (t < 1.0) |
| |
t *= 1e3, unit = "ms"; |
| |
else |
| |
unit = "s"; |
| |
|
| speed_cycletime = 1.0 / atof (e); |
/* want 4 significant figures */ |
| return 1; |
if (t < 1.0) |
| |
prec = 4; |
| |
else if (t < 10.0) |
| |
prec = 3; |
| |
else if (t < 100.0) |
| |
prec = 2; |
| |
else |
| |
prec = 1; |
| |
|
| |
sprintf (buf, "%.*f%s", prec, t, unit); |
| |
return buf; |
| } |
} |
| |
|
| |
|
| /* On FreeBSD 3.3 the headers have #defines like CPU_WALLCLOCK under |
static jmp_buf cycles_works_buf; |
| CTL_MACHDEP but don't seem to have anything for machdep.tsc_freq or |
|
| machdep.i586_freq. Using the string forms with sysctlbyname() works |
|
| though, and lets libc worry about the defines and headers. |
|
| |
|
| FreeBSD 3.3 has tsc_freq, FreeBSD 2.2.8 has i586_freq instead. |
static RETSIGTYPE |
| The "sysctl -a" command prints everything available. */ |
cycles_works_handler (int sig) |
| |
{ |
| |
longjmp (cycles_works_buf, 1); |
| |
} |
| |
|
| #if HAVE_SYSCTLBYNAME |
|
| int |
int |
| speed_cpu_frequency_sysctlbyname (void) |
cycles_works_p (void) |
| { |
{ |
| unsigned val; |
static int result = -1; |
| size_t valsize; |
RETSIGTYPE (*old_handler) _PROTO ((int)); |
| |
unsigned cycles[2]; |
| |
|
| valsize = sizeof(val); |
/* suppress a warning about cycles[] unused */ |
| if (sysctlbyname ("machdep.tsc_freq", &val, &valsize, NULL, 0) != 0 |
cycles[0] = 0; |
| || valsize != sizeof(val)) |
|
| |
if (result != -1) |
| |
goto done; |
| |
|
| |
#ifdef SIGILL |
| |
old_handler = signal (SIGILL, cycles_works_handler); |
| |
if (old_handler == SIG_ERR) |
| { |
{ |
| valsize = sizeof(val); |
if (speed_option_verbose) |
| if (sysctlbyname ("machdep.i586_freq", &val, &valsize, NULL, 0) != 0 |
printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n"); |
| || valsize != sizeof(val)) |
goto yes; |
| return 0; |
|
| } |
} |
| |
if (setjmp (cycles_works_buf)) |
| |
{ |
| |
if (speed_option_verbose) |
| |
printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n"); |
| |
result = 0; |
| |
goto done; |
| |
} |
| |
speed_cyclecounter (cycles); |
| |
signal (SIGILL, old_handler); |
| |
if (speed_option_verbose) |
| |
printf ("cycles_works_p(): speed_cyclecounter() works\n"); |
| |
#else |
| |
|
| speed_cycletime = 1.0 / (double) val; |
if (speed_option_verbose) |
| return 1; |
printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n"); |
| } |
|
| #endif |
#endif |
| |
|
| |
yes: |
| |
result = 1; |
| |
|
| /* Linux doesn't seem to have any system call to get the CPU frequency, at |
done: |
| least not in 2.0.x or 2.2.x, so it's necessary to read /proc/cpuinfo. |
return result; |
| |
} |
| |
|
| i386 2.0.36 - "bogomips" is the CPU frequency. |
|
| |
|
| i386 2.2.13 - has both "cpu MHz" and "bogomips", and it's "cpu MHz" which |
/* The number of clock ticks per second, but looking at sysconf rather than |
| is the frequency. |
just CLK_TCK, where possible. */ |
| |
long |
| alpha 2.2.5 - "cycle frequency [Hz]" seems to be right, "BogoMIPS" is |
clk_tck (void) |
| very slightly different. */ |
|
| |
|
| int |
|
| speed_cpu_frequency_proc_cpuinfo (void) |
|
| { |
{ |
| FILE *fp; |
static long result = -1L; |
| char buf[128]; |
if (result != -1L) |
| double val; |
return result; |
| int ret = 0; |
|
| |
|
| if ((fp = fopen ("/proc/cpuinfo", "r")) != NULL) |
#if HAVE_SYSCONF |
| |
result = sysconf (_SC_CLK_TCK); |
| |
if (result != -1L) |
| { |
{ |
| while (fgets (buf, sizeof (buf), fp) != NULL) |
if (speed_option_verbose) |
| { |
printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result); |
| if (sscanf (buf, "cycle frequency [Hz] : %lf est.\n", &val) == 1) |
return result; |
| { |
|
| speed_cycletime = 1.0 / val; |
|
| ret = 1; |
|
| break; |
|
| } |
|
| if (sscanf (buf, "cpu MHz : %lf\n", &val) == 1) |
|
| { |
|
| speed_cycletime = 1e-6 / val; |
|
| ret = 1; |
|
| break; |
|
| } |
|
| if (sscanf (buf, "bogomips : %lf\n", &val) == 1) |
|
| { |
|
| speed_cycletime = 1e-6 / val; |
|
| ret = 1; |
|
| break; |
|
| } |
|
| } |
|
| fclose (fp); |
|
| } |
} |
| return ret; |
|
| |
fprintf (stderr, |
| |
"sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n"); |
| |
#endif |
| |
|
| |
#ifdef CLK_TCK |
| |
result = CLK_TCK; |
| |
if (speed_option_verbose) |
| |
printf ("CLK_TCK is %ld per second\n", result); |
| |
return result; |
| |
#else |
| |
fprintf (stderr, "CLK_TCK not defined, cannot continue\n"); |
| |
abort (); |
| |
#endif |
| } |
} |
| |
|
| |
|
| /* SunOS /bin/sysinfo prints a line like: |
/* If two times can be observed less than half a clock tick apart, then |
| cpu0 is a "75 MHz TI,TMS390Z55" CPU */ |
assume "get" is microsecond accurate. |
| |
|
| #if HAVE_POPEN |
Two times only 1 microsecond apart are not believed, since some kernels |
| |
take it upon themselves to ensure gettimeofday doesn't return the same |
| |
value twice, for the benefit of applications using it for a timestamp. |
| |
This is obviously very stupid given the speed of CPUs these days. |
| |
|
| |
Making "reps" calls to noop_1() is designed to waste some CPU, with a |
| |
view to getting measurements 2 microseconds (or more) apart. "reps" is |
| |
increased progressively until such a period is seen. |
| |
|
| |
The outer loop "attempts" are just to allow for any random nonsense or |
| |
system load upsetting the measurements (ie. making two successive calls |
| |
to "get" come out as a longer interval than normal). |
| |
|
| |
Bugs: |
| |
|
| |
The assumption that any interval less than a half tick implies |
| |
microsecond resolution is obviously fairly rash, the true resolution |
| |
could be anything between a microsecond and that half tick. Perhaps |
| |
something special would have to be done on a system where this is the |
| |
case, since there's no obvious reliable way to detect it |
| |
automatically. */ |
| |
|
| |
#define MICROSECONDS_P(name, type, get, sec, usec) \ |
| |
{ \ |
| |
static int result = -1; \ |
| |
type st, et; \ |
| |
long dt, half_tick; \ |
| |
unsigned attempt, reps, i, j; \ |
| |
\ |
| |
if (result != -1) \ |
| |
return result; \ |
| |
\ |
| |
result = 0; \ |
| |
half_tick = (1000000L / clk_tck ()) / 2; \ |
| |
\ |
| |
for (attempt = 0; attempt < 5; attempt++) \ |
| |
{ \ |
| |
reps = 0; \ |
| |
for (;;) \ |
| |
{ \ |
| |
get (st); \ |
| |
for (i = 0; i < reps; i++) \ |
| |
for (j = 0; j < 100; j++) \ |
| |
noop_1 (CNST_LIMB(0)); \ |
| |
get (et); \ |
| |
\ |
| |
dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \ |
| |
\ |
| |
if (speed_option_verbose >= 2) \ |
| |
printf ("%s attempt=%u, reps=%u, dt=%ld\n", \ |
| |
name, attempt, reps, dt); \ |
| |
\ |
| |
if (dt >= 2) \ |
| |
break; \ |
| |
\ |
| |
reps = (reps == 0 ? 1 : 2*reps); \ |
| |
if (reps == 0) \ |
| |
break; /* uint overflow, not normal */ \ |
| |
} \ |
| |
\ |
| |
if (dt < half_tick) \ |
| |
{ \ |
| |
result = 1; \ |
| |
break; \ |
| |
} \ |
| |
} \ |
| |
\ |
| |
if (speed_option_verbose) \ |
| |
{ \ |
| |
if (result) \ |
| |
printf ("%s is microsecond accurate\n", name); \ |
| |
else \ |
| |
printf ("%s is only %s clock tick accurate\n", \ |
| |
name, unittime_string (1.0/clk_tck())); \ |
| |
} \ |
| |
return result; \ |
| |
} |
| |
|
| |
|
| int |
int |
| speed_cpu_frequency_sunos_sysinfo (void) |
gettimeofday_microseconds_p (void) |
| { |
{ |
| FILE *fp; |
#define call_gettimeofday(t) gettimeofday (&(t), NULL) |
| char buf[128]; |
#define timeval_tv_sec(t) ((t).tv_sec) |
| double val; |
#define timeval_tv_usec(t) ((t).tv_usec) |
| int ret = 0; |
MICROSECONDS_P ("gettimeofday", struct_timeval, |
| |
call_gettimeofday, timeval_tv_sec, timeval_tv_usec); |
| |
} |
| |
|
| /* Error messages are sent to /dev/null in case /bin/sysinfo doesn't |
int |
| exist. The brackets are necessary for some shells (eg. ash). */ |
getrusage_microseconds_p (void) |
| if ((fp = popen ("(/bin/sysinfo) 2>/dev/null", "r")) != NULL) |
{ |
| { |
#define call_getrusage(t) getrusage (0, &(t)) |
| while (fgets (buf, sizeof (buf), fp) != NULL) |
#define rusage_tv_sec(t) ((t).ru_utime.tv_sec) |
| { |
#define rusage_tv_usec(t) ((t).ru_utime.tv_usec) |
| if (sscanf (buf, " cpu0 is a \"%lf MHz", &val) == 1) |
MICROSECONDS_P ("getrusage", struct_rusage, |
| { |
call_getrusage, rusage_tv_sec, rusage_tv_usec); |
| speed_cycletime = 1e-6 / val; |
|
| ret = 1; |
|
| break; |
|
| } |
|
| } |
|
| pclose (fp); |
|
| } |
|
| return ret; |
|
| } |
} |
| #endif |
|
| |
|
| |
|
| /* This is for Solaris. "psrinfo" is the command-line interface to |
/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version |
| processor_info(). "prtconf -vp" gives similar information. */ |
of glibc (some time post 2.2). |
| |
|
| #if HAVE_PROCESSOR_INFO |
CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes |
| #include <sys/unistd.h> /* for _SC_NPROCESSORS_CONF */ |
defined, but returning -1 for an error). */ |
| #include <sys/processor.h> /* for processor_info_t */ |
|
| |
#ifdef CLOCK_PROCESS_CPUTIME_ID |
| |
# define CGT_ID CLOCK_PROCESS_CPUTIME_ID |
| |
#else |
| |
# ifdef CLOCK_VIRTUAL |
| |
# define CGT_ID CLOCK_VIRTUAL |
| |
# endif |
| |
#endif |
| |
#ifdef CGT_ID |
| |
# define HAVE_CGT_ID 1 |
| |
#else |
| |
# define HAVE_CGT_ID 0 |
| |
# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1) |
| |
#endif |
| |
|
| int |
int |
| speed_cpu_frequency_processor_info (void) |
cgt_works_p (void) |
| { |
{ |
| processor_info_t p; |
static int result = -1; |
| int i, n, mhz = 0; |
struct_timespec unit; |
| |
|
| n = sysconf (_SC_NPROCESSORS_CONF); |
if (! have_cgt) |
| for (i = 0; i < n; i++) |
return 0; |
| |
|
| |
if (! HAVE_CGT_ID) |
| { |
{ |
| if (processor_info (i, &p) != 0) |
if (speed_option_verbose) |
| continue; |
printf ("clock_gettime don't know what ID to use\n"); |
| if (p.pi_state != P_ONLINE) |
result = 0; |
| continue; |
return result; |
| |
} |
| |
|
| if (mhz != 0 && p.pi_clock != mhz) |
if (result != -1) |
| { |
return result; |
| fprintf (stderr, |
|
| "speed_cpu_frequency_processor_info(): There's more than one CPU and they have different clock speeds\n"); |
|
| return 0; |
|
| } |
|
| |
|
| mhz = p.pi_clock; |
/* trial run to see if it works */ |
| |
if (clock_gettime (CGT_ID, &unit) != 0) |
| |
{ |
| |
if (speed_option_verbose) |
| |
printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); |
| |
result = 0; |
| |
return result; |
| } |
} |
| |
|
| speed_cycletime = 1.0e-6 / (double) mhz; |
/* get the resolution */ |
| return 1; |
if (clock_getres (CGT_ID, &unit) != 0) |
| |
{ |
| |
if (speed_option_verbose) |
| |
printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno)); |
| |
result = 0; |
| |
return result; |
| |
} |
| |
|
| |
cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9; |
| |
printf ("clock_gettime is %s accurate\n", |
| |
unittime_string (cgt_unittime)); |
| |
result = 1; |
| |
return result; |
| } |
} |
| #endif |
|
| |
|
| |
|
| /* Each function returns 1 if it succeeds in setting speed_cycletime, or 0 |
volatile unsigned *sgi_addr; |
| if not. */ |
|
| |
|
| static const struct { |
int |
| int (*fun) _PROTO ((void)); |
sgi_works_p (void) |
| const char *description; |
{ |
| |
#if HAVE_SYSSGI && HAVE_MMAP |
| |
static int result = -1; |
| |
|
| } speed_cpu_frequency_table[] = { |
size_t pagesize, offset; |
| |
__psunsigned_t phys, physpage; |
| |
void *virtpage; |
| |
unsigned period_picoseconds; |
| |
int size, fd; |
| |
|
| /* This should be first, so an environment variable can override anything |
if (result != -1) |
| the system gives. */ |
return result; |
| { speed_cpu_frequency_environment, |
|
| "environment variable GMP_CPU_FREQUENCY (in Hertz)" }, |
|
| |
|
| #if HAVE_SYSCTLBYNAME |
phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds); |
| { speed_cpu_frequency_sysctlbyname, |
if (phys == (__psunsigned_t) -1) |
| "sysctlbyname() machdep.tsc_freq or machdep.i586_freq" }, |
{ |
| #endif |
/* ENODEV is the error when a counter is not available */ |
| |
if (speed_option_verbose) |
| |
printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno)); |
| |
result = 0; |
| |
return result; |
| |
} |
| |
sgi_unittime = period_picoseconds * 1e-12; |
| |
|
| #if HAVE_PROCESSOR_INFO |
/* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case. |
| { speed_cpu_frequency_processor_info, |
Challenge/ONYX hardware has a 64 bit byte counter, but there seems no |
| "processor_info() pi_clock" }, |
obvious way to identify that without SGI_CYCLECNTR_SIZE. */ |
| |
#ifdef SGI_CYCLECNTR_SIZE |
| |
size = syssgi (SGI_CYCLECNTR_SIZE); |
| |
if (size == -1) |
| |
{ |
| |
if (speed_option_verbose) |
| |
{ |
| |
printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno)); |
| |
printf (" will assume size==4\n"); |
| |
} |
| |
size = 32; |
| |
} |
| |
#else |
| |
size = 32; |
| #endif |
#endif |
| |
|
| { speed_cpu_frequency_proc_cpuinfo, |
if (size < 32) |
| "linux kernel /proc/cpuinfo file, cpu MHz or bogomips" }, |
{ |
| |
printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size); |
| |
result = 0; |
| |
return result; |
| |
} |
| |
|
| #if HAVE_POPEN |
pagesize = getpagesize(); |
| { speed_cpu_frequency_sunos_sysinfo, |
offset = (size_t) phys & (pagesize-1); |
| "SunOS /bin/sysinfo program cpu0 output" }, |
physpage = phys - offset; |
| #endif |
|
| }; |
|
| |
|
| |
/* shouldn't cross over a page boundary */ |
| |
ASSERT_ALWAYS (offset + size/8 <= pagesize); |
| |
|
| int |
fd = open("/dev/mmem", O_RDONLY); |
| speed_cycletime_init (void) |
if (fd == -1) |
| { |
{ |
| int i; |
if (speed_option_verbose) |
| |
printf ("open /dev/mmem: %s\n", strerror (errno)); |
| |
result = 0; |
| |
return result; |
| |
} |
| |
|
| for (i = 0; i < numberof (speed_cpu_frequency_table); i++) |
virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage); |
| if ((*speed_cpu_frequency_table[i].fun)()) |
if (virtpage == (void *) -1) |
| return 1; |
{ |
| |
if (speed_option_verbose) |
| |
printf ("mmap /dev/mmem: %s\n", strerror (errno)); |
| |
result = 0; |
| |
return result; |
| |
} |
| |
|
| fprintf (stderr, |
/* address of least significant 4 bytes, knowing mips is big endian */ |
| "Cannot determine CPU frequency, need one of the following\n"); |
sgi_addr = (unsigned *) ((char *) virtpage + offset |
| for (i = 0; i < numberof (speed_cpu_frequency_table); i++) |
+ size/8 - sizeof(unsigned)); |
| fprintf (stderr, "\t- %s\n", speed_cpu_frequency_table[i].description); |
result = 1; |
| |
return result; |
| |
|
| |
#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */ |
| return 0; |
return 0; |
| |
#endif |
| } |
} |
| |
|
| |
|
| /* ---------------------------------------------------------------------- */ |
#define DEFAULT(var,n) \ |
| #if SPEED_USE_CYCLECOUNTER |
do { \ |
| |
if (! (var)) \ |
| |
(var) = (n); \ |
| |
} while (0) |
| |
|
| const char *speed_time_string |
|
| = "Time measurements using CPU cycle counter.\n"; |
|
| |
|
| /* bigish value because we have a fast timer */ |
|
| int speed_precision = 10000; |
|
| |
|
| double speed_unittime; |
|
| double speed_cycletime; |
|
| |
|
| static int speed_time_initialized = 0; |
|
| static unsigned speed_starttime_save[2]; |
|
| |
|
| /* Knowing the CPU frequency is mandatory, so cycles can be converted to |
|
| seconds. */ |
|
| void |
void |
| speed_time_init (void) |
speed_time_init (void) |
| { |
{ |
| |
double supplement_unittime = 0.0; |
| |
|
| |
static int speed_time_initialized = 0; |
| if (speed_time_initialized) |
if (speed_time_initialized) |
| return; |
return; |
| speed_time_initialized = 1; |
speed_time_initialized = 1; |
| |
|
| if (!speed_cycletime_init ()) |
speed_cycletime_init (); |
| exit (1); |
|
| |
|
| speed_unittime = speed_cycletime; |
if (have_cycles && cycles_works_p ()) |
| |
{ |
| |
use_cycles = 1; |
| |
DEFAULT (speed_cycletime, 1.0); |
| |
speed_unittime = speed_cycletime; |
| |
DEFAULT (speed_precision, 10000); |
| |
strcpy (speed_time_string, "CPU cycle counter"); |
| |
|
| |
/* only used if a supplementary method is chosen below */ |
| |
cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0 |
| |
* speed_cycletime; |
| |
|
| |
if (have_grus && getrusage_microseconds_p()) |
| |
{ |
| |
/* this is a good combination */ |
| |
use_grus = 1; |
| |
supplement_unittime = grus_unittime = 1.0e-6; |
| |
strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()"); |
| |
} |
| |
else if (have_cycles == 1) |
| |
{ |
| |
/* When speed_cyclecounter has a limited range, look for something |
| |
to supplement it. */ |
| |
if (have_gtod && gettimeofday_microseconds_p()) |
| |
{ |
| |
use_gtod = 1; |
| |
supplement_unittime = gtod_unittime = 1.0e-6; |
| |
strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()"); |
| |
} |
| |
else if (have_grus) |
| |
{ |
| |
use_grus = 1; |
| |
supplement_unittime = grus_unittime = 1.0 / (double) clk_tck (); |
| |
sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime)); |
| |
} |
| |
else if (have_times) |
| |
{ |
| |
use_times = 1; |
| |
supplement_unittime = times_unittime = 1.0 / (double) clk_tck (); |
| |
sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime)); |
| |
} |
| |
else if (have_gtod) |
| |
{ |
| |
use_gtod = 1; |
| |
supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck (); |
| |
sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime)); |
| |
} |
| |
else |
| |
{ |
| |
fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n"); |
| |
fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n"); |
| |
} |
| |
} |
| |
|
| |
if (use_grus || use_times || use_gtod) |
| |
{ |
| |
/* must know cycle period to compare cycles to other measuring |
| |
(via cycles_limit) */ |
| |
speed_cycletime_need_seconds (); |
| |
|
| |
if (speed_precision * supplement_unittime > cycles_limit) |
| |
{ |
| |
fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n"); |
| |
fprintf (stderr, " cycle counter and limited precision supplemental method\n"); |
| |
fprintf (stderr, " (%s)\n", speed_time_string); |
| |
} |
| |
} |
| |
} |
| |
else if (have_stck) |
| |
{ |
| |
strcpy (speed_time_string, "STCK timestamp"); |
| |
/* stck is in units of 2^-12 microseconds, which is very likely higher |
| |
resolution than a cpu cycle */ |
| |
if (speed_cycletime == 0.0) |
| |
speed_cycletime_fail |
| |
("Need to know CPU frequency for effective stck unit"); |
| |
speed_unittime = MAX (speed_cycletime, STCK_PERIOD); |
| |
DEFAULT (speed_precision, 10000); |
| |
} |
| |
else if (have_sgi && sgi_works_p ()) |
| |
{ |
| |
use_sgi = 1; |
| |
DEFAULT (speed_precision, 10000); |
| |
speed_unittime = sgi_unittime; |
| |
sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()", |
| |
unittime_string (speed_unittime)); |
| |
/* supplemented with getrusage, which we assume to have 1ms resolution */ |
| |
use_grus = 1; |
| |
supplement_unittime = 1e-3; |
| |
} |
| |
else if (have_rrt) |
| |
{ |
| |
timebasestruct_t t; |
| |
use_rrt = 1; |
| |
DEFAULT (speed_precision, 10000); |
| |
read_real_time (&t, sizeof(t)); |
| |
switch (t.flag) { |
| |
case RTC_POWER: |
| |
/* FIXME: What's the actual RTC resolution? */ |
| |
speed_unittime = 1e-7; |
| |
strcpy (speed_time_string, "read_real_time() power nanoseconds"); |
| |
break; |
| |
case RTC_POWER_PC: |
| |
t.tb_high = 1; |
| |
t.tb_low = 0; |
| |
time_base_to_time (&t, sizeof(t)); |
| |
speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32; |
| |
sprintf (speed_time_string, "%s read_real_time() powerpc ticks", |
| |
unittime_string (speed_unittime)); |
| |
break; |
| |
default: |
| |
fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n", |
| |
t.flag); |
| |
abort (); |
| |
} |
| |
} |
| |
else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6) |
| |
{ |
| |
/* use clock_gettime if microsecond or better resolution */ |
| |
choose_cgt: |
| |
use_cgt = 1; |
| |
speed_unittime = cgt_unittime; |
| |
DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000)); |
| |
strcpy (speed_time_string, "microsecond accurate getrusage()"); |
| |
} |
| |
else if (have_grus && getrusage_microseconds_p()) |
| |
{ |
| |
use_grus = 1; |
| |
speed_unittime = grus_unittime = 1.0e-6; |
| |
DEFAULT (speed_precision, 1000); |
| |
strcpy (speed_time_string, "microsecond accurate getrusage()"); |
| |
} |
| |
else if (have_gtod && gettimeofday_microseconds_p()) |
| |
{ |
| |
use_gtod = 1; |
| |
speed_unittime = gtod_unittime = 1.0e-6; |
| |
DEFAULT (speed_precision, 1000); |
| |
strcpy (speed_time_string, "microsecond accurate gettimeofday()"); |
| |
} |
| |
else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck()) |
| |
{ |
| |
/* use clock_gettime if 1 tick or better resolution */ |
| |
goto choose_cgt; |
| |
} |
| |
else if (have_times) |
| |
{ |
| |
use_times = 1; |
| |
use_tick_boundary = 1; |
| |
speed_unittime = times_unittime = 1.0 / (double) clk_tck (); |
| |
DEFAULT (speed_precision, 200); |
| |
sprintf (speed_time_string, "%s clock tick times()", |
| |
unittime_string (speed_unittime)); |
| |
} |
| |
else if (have_grus) |
| |
{ |
| |
use_grus = 1; |
| |
use_tick_boundary = 1; |
| |
speed_unittime = grus_unittime = 1.0 / (double) clk_tck (); |
| |
DEFAULT (speed_precision, 200); |
| |
sprintf (speed_time_string, "%s clock tick getrusage()\n", |
| |
unittime_string (speed_unittime)); |
| |
} |
| |
else if (have_gtod) |
| |
{ |
| |
use_gtod = 1; |
| |
use_tick_boundary = 1; |
| |
speed_unittime = gtod_unittime = 1.0 / (double) clk_tck (); |
| |
DEFAULT (speed_precision, 200); |
| |
sprintf (speed_time_string, "%s clock tick gettimeofday()", |
| |
unittime_string (speed_unittime)); |
| |
} |
| |
else |
| |
{ |
| |
fprintf (stderr, "No time measuring method available\n"); |
| |
fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n"); |
| |
abort (); |
| |
} |
| |
|
| |
if (speed_option_verbose) |
| |
{ |
| |
printf ("speed_time_init: %s\n", speed_time_string); |
| |
printf (" speed_precision %d\n", speed_precision); |
| |
printf (" speed_unittime %.2g\n", speed_unittime); |
| |
if (supplement_unittime) |
| |
printf (" supplement_unittime %.2g\n", supplement_unittime); |
| |
printf (" use_tick_boundary %d\n", use_tick_boundary); |
| |
if (have_cycles) |
| |
printf (" cycles_limit %.2g seconds\n", cycles_limit); |
| |
} |
| } |
} |
| |
|
| |
|
| |
|
| |
/* Burn up CPU until a clock tick boundary, for greater accuracy. Set the |
| |
corresponding "start_foo" appropriately too. */ |
| |
|
| void |
void |
| speed_starttime (void) |
grus_tick_boundary (void) |
| { |
{ |
| if (!speed_time_initialized) |
struct_rusage prev; |
| speed_time_init (); |
getrusage (0, &prev); |
| speed_cyclecounter (speed_starttime_save); |
do { |
| |
getrusage (0, &start_grus); |
| |
} while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec); |
| } |
} |
| |
|
| #define M_2POWU ((double) (1L << (BITS_PER_INT-2)) * 4.0) |
void |
| #define M_2POW32 4294967296.0 |
gtod_tick_boundary (void) |
| |
{ |
| |
struct_timeval prev; |
| |
gettimeofday (&prev, NULL); |
| |
do { |
| |
gettimeofday (&start_gtod, NULL); |
| |
} while (start_gtod.tv_usec == prev.tv_usec); |
| |
} |
| |
|
| double |
void |
| speed_endtime (void) |
times_tick_boundary (void) |
| { |
{ |
| unsigned endtime[2], e0; |
struct_tms prev; |
| double t; |
times (&prev); |
| |
do |
| |
times (&start_times); |
| |
while (start_times.tms_utime == prev.tms_utime); |
| |
} |
| |
|
| speed_cyclecounter (endtime); |
|
| |
|
| /* This still works even if speed_cyclecounter() puts a value bigger than |
/* "have_" values are tested to let unused code go dead. */ |
| 32-bits in the low word. The start and end values are allowed to |
|
| cancel in uints in case a uint is more than the 53 bits that will |
|
| normally fit in a double. */ |
|
| e0 = endtime[0] - speed_starttime_save[0]; |
|
| t = e0 - (e0 > endtime[0] ? M_2POWU : 0); |
|
| t += (endtime[1] - speed_starttime_save[1]) * M_2POW32; |
|
| |
|
| return t * speed_unittime; |
void |
| } |
speed_starttime (void) |
| |
{ |
| |
speed_time_init (); |
| |
|
| #endif |
if (have_grus && use_grus) |
| |
{ |
| |
if (use_tick_boundary) |
| |
grus_tick_boundary (); |
| |
else |
| |
getrusage (0, &start_grus); |
| |
} |
| |
|
| |
if (have_gtod && use_gtod) |
| |
{ |
| |
if (use_tick_boundary) |
| |
gtod_tick_boundary (); |
| |
else |
| |
gettimeofday (&start_gtod, NULL); |
| |
} |
| |
|
| /* ---------------------------------------------------------------------- */ |
if (have_times && use_times) |
| #if SPEED_USE_MICROSECOND_GETRUSAGE |
{ |
| #include <sys/types.h> |
if (use_tick_boundary) |
| #include <sys/time.h> |
times_tick_boundary (); |
| #include <sys/resource.h> |
else |
| |
times (&start_times); |
| |
} |
| |
|
| const char *speed_time_string |
if (have_cgt && use_cgt) |
| = "Time measurements using microsecond accurate getrusage.\n"; |
clock_gettime (CGT_ID, &start_cgt); |
| |
|
| int speed_precision = 1000; |
if (have_rrt && use_rrt) |
| |
read_real_time (&start_rrt, sizeof(start_rrt)); |
| |
|
| double speed_unittime = 1.0e-6; |
if (have_sgi && use_sgi) |
| double speed_cycletime = 1.0; |
start_sgi = *sgi_addr; |
| |
|
| static struct rusage speed_starttime_save; |
if (have_stck && use_stck) |
| static int speed_time_initialized = 0; |
STCK (start_stck); |
| |
|
| void |
/* Cycles sampled last for maximum accuracy. */ |
| speed_time_init (void) |
if (have_cycles && use_cycles) |
| { |
speed_cyclecounter (start_cycles); |
| if (speed_time_initialized) |
|
| return; |
|
| speed_time_initialized = 1; |
|
| |
|
| speed_cycletime_init (); |
|
| } |
} |
| |
|
| void |
|
| speed_starttime (void) |
|
| { |
|
| if (!speed_time_initialized) |
|
| speed_time_init (); |
|
| |
|
| getrusage (0, &speed_starttime_save); |
/* Calculate the difference between two cycle counter samples, as a "double" |
| } |
counter of cycles. |
| |
|
| |
The start and end values are allowed to cancel in integers in case the |
| |
counter values are bigger than the 53 bits that normally fit in a double. |
| |
|
| |
This works even if speed_cyclecounter() puts a value bigger than 32-bits |
| |
in the low word (the high word always gets a 2**32 multiplier though). */ |
| |
|
| double |
double |
| speed_endtime (void) |
speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2]) |
| { |
{ |
| struct rusage r; |
unsigned d; |
| |
double t; |
| |
|
| getrusage (0, &r); |
if (have_cycles == 1) |
| return TIMEVAL_SECS (&r.ru_utime) |
{ |
| - TIMEVAL_SECS (&speed_starttime_save.ru_utime); |
t = (end[0] - start[0]); |
| |
} |
| |
else |
| |
{ |
| |
d = end[0] - start[0]; |
| |
t = d - (d > end[0] ? M_2POWU : 0.0); |
| |
t += (end[1] - start[1]) * M_2POW32; |
| |
} |
| |
return t; |
| } |
} |
| #endif |
|
| |
|
| |
|
| /* ---------------------------------------------------------------------- */ |
/* Calculate the difference between "start" and "end" using fields "sec" and |
| #if SPEED_USE_MICROSECOND_GETTIMEOFDAY |
"psec", where each "psec" is a "punit" of a second. |
| /* This method is for systems with a microsecond accurate gettimeofday(). |
|
| |
|
| A dummy timezone parameter is always given to gettimeofday(), in case it |
The seconds parts are allowed to cancel before being combined with the |
| doesn't allow NULL. */ |
psec parts, in case a simple "sec+psec*punit" exceeds the precision of a |
| |
double. |
| |
|
| #include <sys/time.h> |
Total time is only calculated in a "double" since an integer count of |
| |
psecs might overflow. 2^32 microseconds is only a bit over an hour, or |
| |
2^32 nanoseconds only about 4 seconds. |
| |
|
| const char *speed_time_string |
The casts to "long" are for the beneifit of timebasestruct_t, where the |
| = "Time measurements using microsecond accurate gettimeofday.\n"; |
fields are only "unsigned int", but we want a signed difference. */ |
| |
|
| /* highish value because we have an accurate timer */ |
#define DIFF_SECS_ROUTINE(sec, psec, punit) \ |
| int speed_precision = 1000; |
{ \ |
| |
long sec_diff, psec_diff; \ |
| |
sec_diff = (long) end->sec - (long) start->sec; \ |
| |
psec_diff = (long) end->psec - (long) start->psec; \ |
| |
return (double) sec_diff + punit * (double) psec_diff; \ |
| |
} |
| |
|
| double speed_unittime = 1.0e-6; |
double |
| double speed_cycletime = 1.0; |
timeval_diff_secs (const struct_timeval *end, const struct_timeval *start) |
| |
|
| static struct timeval speed_starttime_save; |
|
| static int speed_time_initialized = 0; |
|
| |
|
| void |
|
| speed_time_init (void) |
|
| { |
{ |
| if (speed_time_initialized) |
DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6); |
| return; |
} |
| speed_time_initialized = 1; |
|
| |
|
| speed_cycletime_init (); |
double |
| |
rusage_diff_secs (const struct_rusage *end, const struct_rusage *start) |
| |
{ |
| |
DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6); |
| } |
} |
| |
|
| void |
double |
| speed_starttime (void) |
timespec_diff_secs (const struct_timespec *end, const struct_timespec *start) |
| { |
{ |
| struct timezone tz; |
DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9); |
| if (!speed_time_initialized) |
} |
| speed_time_init (); |
|
| |
|
| gettimeofday (&speed_starttime_save, &tz); |
/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */ |
| |
double |
| |
timebasestruct_diff_secs (const timebasestruct_t *end, |
| |
const timebasestruct_t *start) |
| |
{ |
| |
DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9); |
| } |
} |
| |
|
| |
|
| double |
double |
| speed_endtime (void) |
speed_endtime (void) |
| { |
{ |
| struct timeval t; |
#define END_USE(name,value) \ |
| struct timezone tz; |
do { \ |
| |
if (speed_option_verbose >= 3) \ |
| |
printf ("speed_endtime(): used %s\n", name); \ |
| |
result = value; \ |
| |
goto done; \ |
| |
} while (0) |
| |
|
| gettimeofday (&t, &tz); |
#define END_ENOUGH(name,value) \ |
| return TIMEVAL_SECS (&t) - TIMEVAL_SECS (&speed_starttime_save); |
do { \ |
| } |
if (speed_option_verbose >= 3) \ |
| |
printf ("speed_endtime(): %s gives enough precision\n", name); \ |
| |
result = value; \ |
| |
goto done; \ |
| |
} while (0) |
| |
|
| #endif |
#define END_EXCEED(name,value) \ |
| |
do { \ |
| |
if (speed_option_verbose >= 3) \ |
| |
printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \ |
| |
name); \ |
| |
result = value; \ |
| |
goto done; \ |
| |
} while (0) |
| |
|
| |
unsigned end_cycles[2]; |
| |
stck_t end_stck; |
| |
unsigned end_sgi; |
| |
timebasestruct_t end_rrt; |
| |
struct_timespec end_cgt; |
| |
struct_timeval end_gtod; |
| |
struct_rusage end_grus; |
| |
struct_tms end_times; |
| |
double t_gtod, t_grus, t_times, t_cgt; |
| |
double t_rrt, t_sgi, t_stck, t_cycles; |
| |
double result; |
| |
|
| /* ---------------------------------------------------------------------- */ |
/* Cycles sampled first for maximum accuracy. |
| #if SPEED_USE_TMS_UTIME |
"have_" values tested to let unused code go dead. */ |
| /* You're in trouble if you have to use this method. Speed measurments and |
|
| threshold tuning are going to take a long time. */ |
|
| |
|
| #if STDC_HEADERS |
if (have_cycles && use_cycles) speed_cyclecounter (end_cycles); |
| #include <errno.h> /* for errno */ |
if (have_stck && use_stck) STCK (end_stck); |
| #include <string.h> /* for strerror */ |
if (have_sgi && use_sgi) end_sgi = *sgi_addr; |
| #endif |
if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt)); |
| #if HAVE_UNISTD_H |
if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt); |
| #include <unistd.h> /* for sysconf */ |
if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL); |
| #endif |
if (have_grus && use_grus) getrusage (0, &end_grus); |
| #include <sys/times.h> /* for times */ |
if (have_times && use_times) times (&end_times); |
| |
|
| const char *speed_time_string |
result = -1.0; |
| = "Time measurements using tms_utime.\n"; |
|
| |
|
| |
if (speed_option_verbose >= 4) |
| |
{ |
| |
printf ("speed_endtime():\n"); |
| |
if (use_cycles) |
| |
printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n", |
| |
start_cycles[1], start_cycles[0], |
| |
end_cycles[1], end_cycles[0]); |
| |
|
| /* lowish default value so we don't take days and days to do tuning */ |
if (use_stck) |
| int speed_precision = 200; |
printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck); |
| |
|
| double speed_unittime; |
if (use_sgi) |
| double speed_cycletime = 1.0; |
printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi); |
| |
|
| static struct tms speed_starttime_save; |
if (use_rrt) |
| static int speed_time_initialized = 0; |
printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n", |
| |
start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low, |
| |
end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low); |
| |
|
| void |
if (use_cgt) |
| speed_time_init (void) |
printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n", |
| { |
start_cgt.tv_sec, start_cgt.tv_nsec, |
| long clk_tck; |
end_cgt.tv_sec, end_cgt.tv_nsec); |
| |
|
| if (speed_time_initialized) |
if (use_gtod) |
| return; |
printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n", |
| speed_time_initialized = 1; |
start_gtod.tv_sec, start_gtod.tv_usec, |
| |
end_gtod.tv_sec, end_gtod.tv_usec); |
| |
|
| speed_cycletime_init (); |
if (use_grus) |
| |
printf (" getrusage %ld.%06ld -> %ld.%06ld\n", |
| |
start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec, |
| |
end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec); |
| |
|
| #if HAVE_SYSCONF |
if (use_times) |
| clk_tck = sysconf (_SC_CLK_TCK); |
printf (" times %ld -> %ld\n", |
| if (clk_tck == -1L) |
start_times.tms_utime, end_times.tms_utime); |
| |
} |
| |
|
| |
if (use_rrt) |
| { |
{ |
| fprintf (stderr, "sysconf(_SC_CLK_TCK) not available: %s\n", |
time_base_to_time (&start_rrt, sizeof(start_rrt)); |
| strerror(errno)); |
time_base_to_time (&end_rrt, sizeof(end_rrt)); |
| fprintf (stderr, "\tusing CLK_TCK instead\n"); |
t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt); |
| clk_tck = CLK_TCK; |
END_USE ("read_real_time()", t_rrt); |
| } |
} |
| #else |
|
| clk_tck = CLK_TCK; |
|
| #endif |
|
| |
|
| speed_unittime = 1.0 / (double) clk_tck; |
if (use_cgt) |
| } |
{ |
| |
t_cgt = timespec_diff_secs (&end_cgt, &start_cgt); |
| |
END_USE ("clock_gettime()", t_cgt); |
| |
} |
| |
|
| /* Burn up CPU until a times() tms_utime tick boundary. |
if (use_grus) |
| Doing so lets you know a measurement has started on a tick boundary, |
{ |
| effectively halving the uncertainty in the measurement. |
t_grus = rusage_diff_secs (&end_grus, &start_grus); |
| *t1 gets the start times() values the caller should use. */ |
|
| void |
|
| times_utime_boundary (struct tms *t1) |
|
| { |
|
| struct tms t2; |
|
| times (&t2); |
|
| do |
|
| times (t1); |
|
| while (t1->tms_utime == t2.tms_utime); |
|
| } |
|
| |
|
| void |
/* Use getrusage() if the cycle counter limit would be exceeded, or if |
| speed_starttime (void) |
it provides enough accuracy already. */ |
| { |
if (use_cycles) |
| if (!speed_time_initialized) |
{ |
| speed_time_init (); |
if (t_grus >= speed_precision*grus_unittime) |
| times_utime_boundary (&speed_starttime_save); |
END_ENOUGH ("getrusage()", t_grus); |
| } |
if (t_grus >= cycles_limit) |
| |
END_EXCEED ("getrusage()", t_grus); |
| |
} |
| |
} |
| |
|
| double |
if (use_times) |
| speed_endtime (void) |
{ |
| { |
t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime; |
| struct tms t; |
|
| times (&t); |
|
| return (t.tms_utime - speed_starttime_save.tms_utime) * speed_unittime; |
|
| } |
|
| |
|
| #endif |
/* Use times() if the cycle counter limit would be exceeded, or if |
| |
it provides enough accuracy already. */ |
| |
if (use_cycles) |
| |
{ |
| |
if (t_times >= speed_precision*times_unittime) |
| |
END_ENOUGH ("times()", t_times); |
| |
if (t_times >= cycles_limit) |
| |
END_EXCEED ("times()", t_times); |
| |
} |
| |
} |
| |
|
| |
if (use_gtod) |
| |
{ |
| |
t_gtod = timeval_diff_secs (&end_gtod, &start_gtod); |
| |
|
| |
/* Use gettimeofday() if it measured a value bigger than the cycle |
| |
counter can handle. */ |
| |
if (use_cycles) |
| |
{ |
| |
if (t_gtod >= cycles_limit) |
| |
END_EXCEED ("gettimeofday()", t_gtod); |
| |
} |
| |
} |
| |
|
| |
if (use_stck) |
| |
{ |
| |
t_stck = (end_stck - start_stck) * STCK_PERIOD; |
| |
END_USE ("stck", t_stck); |
| |
} |
| |
|
| |
if (use_sgi) |
| |
{ |
| |
t_sgi = (end_sgi - start_sgi) * sgi_unittime; |
| |
END_USE ("SGI hardware counter", t_sgi); |
| |
} |
| |
|
| |
if (use_cycles) |
| |
{ |
| |
t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles) |
| |
* speed_cycletime; |
| |
END_USE ("cycle counter", t_cycles); |
| |
} |
| |
|
| |
if (use_grus && getrusage_microseconds_p()) |
| |
END_USE ("getrusage()", t_grus); |
| |
|
| |
if (use_gtod && gettimeofday_microseconds_p()) |
| |
END_USE ("gettimeofday()", t_gtod); |
| |
|
| |
if (use_times) END_USE ("times()", t_times); |
| |
if (use_grus) END_USE ("getrusage()", t_grus); |
| |
if (use_gtod) END_USE ("gettimeofday()", t_gtod); |
| |
|
| |
fprintf (stderr, "speed_endtime(): oops, no time method available\n"); |
| |
abort (); |
| |
|
| |
done: |
| |
if (result < 0.0) |
| |
{ |
| |
fprintf (stderr, |
| |
"speed_endtime(): fatal error: negative time measured: %.9f\n", |
| |
result); |
| |
abort (); |
| |
} |
| |
return result; |
| |
} |