| version 1.1, 2000/09/09 14:13:19 |
version 1.1.1.2, 2003/08/25 16:06:37 |
|
|
| /* Shared speed subroutines. */ |
/* Shared speed subroutines. |
| |
|
| /* |
Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
| Copyright (C) 1999, 2000 Free Software Foundation, Inc. |
|
| |
|
| This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
| |
|
| Line 18 License for more details. |
|
| Line 17 License for more details. |
|
| You should have received a copy of the GNU Lesser General Public License |
You should have received a copy of the GNU Lesser General Public License |
| along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
| MA 02111-1307, USA. |
MA 02111-1307, USA. */ |
| */ |
|
| |
|
| #include <errno.h> |
#include <errno.h> |
| #include <fcntl.h> |
#include <fcntl.h> |
| Line 36 MA 02111-1307, USA. |
|
| Line 34 MA 02111-1307, USA. |
|
| #include "gmp-impl.h" |
#include "gmp-impl.h" |
| #include "longlong.h" |
#include "longlong.h" |
| |
|
| |
#include "tests.h" |
| #include "speed.h" |
#include "speed.h" |
| |
|
| /* Change this to "#define TRACE(x) x" to get traces. */ |
|
| #define TRACE(x) |
|
| |
|
| |
int speed_option_addrs = 0; |
| |
int speed_option_verbose = 0; |
| |
|
| typedef int (*qsort_function_t) _PROTO ((const void *, const void *)); |
|
| |
|
| |
/* Provide __clz_tab even if it's not required, for the benefit of new code |
| |
being tested with many.pl. */ |
| |
#ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
| |
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
| |
#include "mp_clz_tab.c" |
| |
#undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
| |
#endif |
| |
|
| int speed_option_addrs = 0; |
|
| |
|
| |
|
| void |
void |
| pentium_wbinvd(void) |
pentium_wbinvd(void) |
| { |
{ |
| Line 90 pentium_wbinvd(void) |
|
| Line 93 pentium_wbinvd(void) |
|
| #endif |
#endif |
| } |
} |
| |
|
| static int |
|
| |
int |
| double_cmp_ptr (const double *p, const double *q) |
double_cmp_ptr (const double *p, const double *q) |
| { |
{ |
| if (*p > *q) return 1; |
if (*p > *q) return 1; |
| Line 125 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| Line 129 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| int i, j, e; |
int i, j, e; |
| double t[30]; |
double t[30]; |
| double t_unsorted[30]; |
double t_unsorted[30]; |
| |
double reps_d; |
| |
|
| /* Use dummy parameters if caller doesn't provide any. Only a few special |
/* Use dummy parameters if caller doesn't provide any. Only a few special |
| "fun"s will cope with this, speed_noop() is one. */ |
"fun"s will cope with this, speed_noop() is one. */ |
| Line 144 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| Line 149 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| s->dst_num = 0; |
s->dst_num = 0; |
| |
|
| t[i] = (*fun) (s); |
t[i] = (*fun) (s); |
| t_unsorted[i] = t[i]; |
|
| |
|
| TRACE (printf("size=%ld reps=%u r=%d attempt=%d %.9f\n", |
if (speed_option_verbose >= 3) |
| s->size, s->reps, s->r, i, t[i])); |
printf("size=%ld reps=%u r=%ld attempt=%d %.9f\n", |
| |
s->size, s->reps, s->r, i, t[i]); |
| |
|
| if (t[i] == -1.0) |
if (t[i] == -1.0) |
| return -1.0; |
return -1.0; |
| Line 156 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| Line 161 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| break; |
break; |
| |
|
| /* go to a value of reps to make t[i] >= precision */ |
/* go to a value of reps to make t[i] >= precision */ |
| s->reps = (unsigned) ceil (1.1 * s->reps |
reps_d = ceil (1.1 * s->reps |
| * speed_unittime * speed_precision |
* speed_unittime * speed_precision |
| / MAX (t[i], speed_unittime)); |
/ MAX (t[i], speed_unittime)); |
| |
if (reps_d > 2e9 || reps_d < 1.0) |
| |
{ |
| |
fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d); |
| |
fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n", |
| |
s->reps, speed_unittime, speed_precision, t[i]); |
| |
abort (); |
| |
} |
| |
s->reps = (unsigned) reps_d; |
| } |
} |
| t[i] /= s->reps; |
t[i] /= s->reps; |
| |
t_unsorted[i] = t[i]; |
| |
|
| if (speed_precision == 0) |
if (speed_precision == 0) |
| return t[i]; |
return t[i]; |
| Line 184 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| Line 198 speed_measure (double (*fun) _PROTO ((struct speed_par |
|
| |
|
| fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n", |
fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n", |
| e, (TOLERANCE-1.0)*100.0); |
e, (TOLERANCE-1.0)*100.0); |
| fprintf (stderr, " %.12f is about 0.5%%\n", t[0]*(TOLERANCE-1.0)); |
fprintf (stderr, " unsorted sorted\n"); |
| |
fprintf (stderr, " %.12f %.12f is about 0.5%%\n", |
| |
t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0)); |
| for (i = 0; i < numberof (t); i++) |
for (i = 0; i < numberof (t); i++) |
| fprintf (stderr, " %.09f\n", t_unsorted[i]); |
fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]); |
| |
|
| return -1.0; |
return -1.0; |
| } |
} |
| Line 317 speed_cache_fill (struct speed_params *s) |
|
| Line 333 speed_cache_fill (struct speed_params *s) |
|
| } |
} |
| |
|
| |
|
| /* Return p advanced to the next multiple of "align" bytes. "align" must be |
|
| a power of 2. Care is taken not to assume sizeof(int)==sizeof(pointer). |
|
| Using "unsigned long" avoids a warning on hpux. */ |
|
| void * |
|
| align_pointer (void *p, size_t align) |
|
| { |
|
| unsigned long d; |
|
| d = ((unsigned long) p) & (align-1); |
|
| d = (d != 0 ? align-d : 0); |
|
| return (void *) (((char *) p) + d); |
|
| } |
|
| |
|
| /* Note that memory allocated with this function can never be freed, because |
|
| the start address of the block allocated is discarded. */ |
|
| void * |
|
| _mp_allocate_func_aligned (size_t bytes, size_t align) |
|
| { |
|
| return align_pointer ((*_mp_allocate_func) (bytes + align-1), align); |
|
| } |
|
| |
|
| |
|
| void * |
|
| _mp_allocate_or_reallocate (void *ptr, size_t oldsize, size_t newsize) |
|
| { |
|
| if (ptr == NULL) |
|
| return (*_mp_allocate_func) (newsize); |
|
| else |
|
| return (*_mp_reallocate_func) (ptr, oldsize, newsize); |
|
| } |
|
| |
|
| |
|
| /* Adjust ptr to align to CACHE_LINE_SIZE bytes plus "align" limbs. ptr |
/* Adjust ptr to align to CACHE_LINE_SIZE bytes plus "align" limbs. ptr |
| needs to have room for up to CACHE_LINE_SIZE-4 extra bytes. */ |
needs to have room for up to CACHE_LINE_SIZE-4 extra bytes. */ |
| |
|
| Line 368 speed_tmp_alloc_adjust (void *ptr, mp_size_t align) |
|
| Line 353 speed_tmp_alloc_adjust (void *ptr, mp_size_t align) |
|
| } |
} |
| |
|
| |
|
| void |
|
| mpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size) |
|
| { |
|
| ASSERT (size >= 0); |
|
| MPN_NORMALIZE (p, size); |
|
| MPZ_REALLOC (z, size); |
|
| MPN_COPY (PTR(z), p, size); |
|
| SIZ(z) = size; |
|
| } |
|
| |
|
| |
|
| /* Miscellanous options accepted by tune and speed programs under -o. */ |
/* Miscellanous options accepted by tune and speed programs under -o. */ |
| |
|
| void |
void |
| speed_option_set (const char *s) |
speed_option_set (const char *s) |
| { |
{ |
| if (strcmp (s, "addrs") == 0) speed_option_addrs = 1; |
int n; |
| |
|
| |
if (strcmp (s, "addrs") == 0) |
| |
{ |
| |
speed_option_addrs = 1; |
| |
} |
| |
else if (strcmp (s, "verbose") == 0) |
| |
{ |
| |
speed_option_verbose++; |
| |
} |
| |
else if (sscanf (s, "verbose=%d", &n) == 1) |
| |
{ |
| |
speed_option_verbose = n; |
| |
} |
| else |
else |
| { |
{ |
| printf ("Unrecognised -o option: %s\n", s); |
printf ("Unrecognised -o option: %s\n", s); |
| Line 439 speed_option_set (const char *s) |
|
| Line 426 speed_option_set (const char *s) |
|
| double |
double |
| speed_MPN_COPY (struct speed_params *s) |
speed_MPN_COPY (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY (wp, s->xp, s->size)); |
SPEED_ROUTINE_MPN_COPY (MPN_COPY); |
| } |
} |
| double |
double |
| speed_MPN_COPY_INCR (struct speed_params *s) |
speed_MPN_COPY_INCR (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY_INCR (wp, s->xp, s->size)); |
SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR); |
| } |
} |
| double |
double |
| speed_MPN_COPY_DECR (struct speed_params *s) |
speed_MPN_COPY_DECR (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY_DECR (wp, s->xp, s->size)); |
SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR); |
| } |
} |
| |
#if HAVE_NATIVE_mpn_copyi |
| double |
double |
| |
speed_mpn_copyi (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_COPY (mpn_copyi); |
| |
} |
| |
#endif |
| |
#if HAVE_NATIVE_mpn_copyd |
| |
double |
| |
speed_mpn_copyd (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_COPY (mpn_copyd); |
| |
} |
| |
#endif |
| |
double |
| speed_memcpy (struct speed_params *s) |
speed_memcpy (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_COPY_CALL |
SPEED_ROUTINE_MPN_COPY_BYTES (memcpy); |
| (memcpy (wp, s->xp, s->size * BYTES_PER_MP_LIMB)); |
|
| } |
} |
| |
double |
| |
speed_mpn_com_n (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_COPY (mpn_com_n); |
| |
} |
| |
|
| |
|
| double |
double |
| Line 476 speed_mpn_mul_1 (struct speed_params *s) |
|
| Line 481 speed_mpn_mul_1 (struct speed_params *s) |
|
| { |
{ |
| SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1); |
SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1); |
| } |
} |
| |
double |
| |
speed_mpn_mul_1_inplace (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1); |
| |
} |
| |
|
| |
#if HAVE_NATIVE_mpn_mul_2 |
| |
double |
| |
speed_mpn_mul_2 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_MUL_2 (mpn_mul_2); |
| |
} |
| |
#endif |
| |
|
| |
|
| double |
double |
| speed_mpn_lshift (struct speed_params *s) |
speed_mpn_lshift (struct speed_params *s) |
| { |
{ |
| Line 517 speed_mpn_divrem_1cf (struct speed_params *s) |
|
| Line 535 speed_mpn_divrem_1cf (struct speed_params *s) |
|
| #endif |
#endif |
| |
|
| double |
double |
| |
speed_mpn_divrem_1_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div); |
| |
} |
| |
double |
| |
speed_mpn_divrem_1f_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div); |
| |
} |
| |
double |
| |
speed_mpn_divrem_1_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv); |
| |
} |
| |
double |
| |
speed_mpn_divrem_1f_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv); |
| |
} |
| |
double |
| |
speed_mpn_mod_1_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div); |
| |
} |
| |
double |
| |
speed_mpn_mod_1_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv); |
| |
} |
| |
|
| |
double |
| |
speed_mpn_preinv_divrem_1 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1); |
| |
} |
| |
double |
| |
speed_mpn_preinv_divrem_1f (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1); |
| |
} |
| |
|
| |
double |
| |
speed_mpn_mod_34lsub1 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1); |
| |
} |
| |
|
| |
double |
| speed_mpn_divrem_2 (struct speed_params *s) |
speed_mpn_divrem_2 (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2); |
SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2); |
| } |
} |
| |
double |
| |
speed_mpn_divrem_2_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div); |
| |
} |
| |
double |
| |
speed_mpn_divrem_2_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv); |
| |
} |
| |
|
| double |
double |
| speed_mpn_mod_1 (struct speed_params *s) |
speed_mpn_mod_1 (struct speed_params *s) |
| Line 534 speed_mpn_mod_1c (struct speed_params *s) |
|
| Line 610 speed_mpn_mod_1c (struct speed_params *s) |
|
| SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c); |
SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c); |
| } |
} |
| #endif |
#endif |
| |
double |
| |
speed_mpn_preinv_mod_1 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1); |
| |
} |
| |
|
| double |
double |
| |
speed_mpn_divexact_1 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1); |
| |
} |
| |
|
| |
double |
| speed_mpn_divexact_by3 (struct speed_params *s) |
speed_mpn_divexact_by3 (struct speed_params *s) |
| { |
{ |
| /* mpn_divexact_by3 is a macro, so the _CALL form is necessary */ |
SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3); |
| SPEED_ROUTINE_MPN_COPY_CALL(mpn_divexact_by3 (wp, s->xp, s->size)); |
|
| } |
} |
| |
|
| |
#if HAVE_NATIVE_mpn_modexact_1_odd |
| |
double |
| |
speed_mpn_modexact_1_odd (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd); |
| |
} |
| |
#endif |
| |
|
| double |
double |
| speed_mpn_bz_divrem_n (struct speed_params *s) |
speed_mpn_modexact_1c_odd (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_BZ_DIVREM_N (mpn_bz_divrem_n); |
SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd); |
| } |
} |
| |
|
| |
|
| double |
double |
| speed_mpn_bz_divrem_sb (struct speed_params *s) |
speed_mpn_dc_tdiv_qr (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_BZ_DIVREM_SB (mpn_sb_divrem_mn); |
SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr); |
| } |
} |
| double |
double |
| speed_mpn_bz_tdiv_qr (struct speed_params *s) |
speed_mpn_dc_divrem_n (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_BZ_TDIV_QR (mpn_tdiv_qr); |
SPEED_ROUTINE_MPN_DC_DIVREM_N (mpn_dc_divrem_n); |
| } |
} |
| |
double |
| |
speed_mpn_dc_divrem_sb (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn); |
| |
} |
| |
double |
| |
speed_mpn_dc_divrem_sb_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_div); |
| |
} |
| |
double |
| |
speed_mpn_dc_divrem_sb_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_inv); |
| |
} |
| |
|
| |
double |
| |
speed_mpn_sb_divrem_m3 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn); |
| |
} |
| |
double |
| |
speed_mpn_sb_divrem_m3_div (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_div); |
| |
} |
| |
double |
| |
speed_mpn_sb_divrem_m3_inv (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_inv); |
| |
} |
| |
|
| double |
double |
| |
speed_mpz_mod (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_MOD (mpz_mod); |
| |
} |
| |
double |
| |
speed_redc (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_REDC (redc); |
| |
} |
| |
|
| |
|
| |
double |
| speed_mpn_popcount (struct speed_params *s) |
speed_mpn_popcount (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount); |
SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount); |
| Line 582 speed_mpn_sub_n (struct speed_params *s) |
|
| Line 719 speed_mpn_sub_n (struct speed_params *s) |
|
| { |
{ |
| SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n); |
SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n); |
| } |
} |
| double |
|
| speed_mpn_add_n_self (struct speed_params *s) |
|
| { |
|
| SPEED_ROUTINE_MPN_BINARY_N_SELF (mpn_add_n); |
|
| } |
|
| double |
|
| speed_mpn_add_n_inplace (struct speed_params *s) |
|
| { |
|
| SPEED_ROUTINE_MPN_BINARY_N_INPLACE (mpn_add_n); |
|
| } |
|
| |
|
| |
|
| /* mpn_and_n etc can be macros and so have to be handled with |
/* mpn_and_n etc can be macros and so have to be handled with |
| Line 666 speed_mpn_sqr_basecase (struct speed_params *s) |
|
| Line 793 speed_mpn_sqr_basecase (struct speed_params *s) |
|
| SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase); |
SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase); |
| } |
} |
| |
|
| |
#if HAVE_NATIVE_mpn_sqr_diagonal |
| double |
double |
| |
speed_mpn_sqr_diagonal (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal); |
| |
} |
| |
#endif |
| |
|
| |
double |
| speed_mpn_kara_mul_n (struct speed_params *s) |
speed_mpn_kara_mul_n (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n); |
SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n); |
| Line 689 speed_mpn_toom3_sqr_n (struct speed_params *s) |
|
| Line 824 speed_mpn_toom3_sqr_n (struct speed_params *s) |
|
| } |
} |
| |
|
| double |
double |
| |
speed_mpn_toom3_mul_n_mpn (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n_mpn); |
| |
} |
| |
double |
| |
speed_mpn_toom3_mul_n_open (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n_open); |
| |
} |
| |
double |
| |
speed_mpn_toom3_sqr_n_mpn (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n_mpn); |
| |
} |
| |
double |
| |
speed_mpn_toom3_sqr_n_open (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n_open); |
| |
} |
| |
|
| |
double |
| speed_mpn_mul_fft_full (struct speed_params *s) |
speed_mpn_mul_fft_full (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_MUL_N_CALL |
SPEED_ROUTINE_MPN_MUL_N_CALL |
| Line 764 speed_mpn_gcd (struct speed_params *s) |
|
| Line 920 speed_mpn_gcd (struct speed_params *s) |
|
| SPEED_ROUTINE_MPN_GCD (mpn_gcd); |
SPEED_ROUTINE_MPN_GCD (mpn_gcd); |
| } |
} |
| double |
double |
| |
speed_mpn_gcd_binary (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary); |
| |
} |
| |
|
| |
#if HAVE_NATIVE_mpn_gcd_finda |
| |
double |
| |
speed_mpn_gcd_finda (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCD_FINDA (mpn_gcd_finda); |
| |
} |
| |
#endif |
| |
|
| |
|
| |
double |
| speed_mpn_gcdext (struct speed_params *s) |
speed_mpn_gcdext (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext); |
SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext); |
| } |
} |
| double |
double |
| |
speed_mpn_gcdext_single (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single); |
| |
} |
| |
double |
| |
speed_mpn_gcdext_double (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double); |
| |
} |
| |
double |
| |
speed_mpn_gcdext_one_single (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single); |
| |
} |
| |
double |
| |
speed_mpn_gcdext_one_double (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double); |
| |
} |
| |
double |
| speed_mpn_gcd_1 (struct speed_params *s) |
speed_mpn_gcd_1 (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1); |
SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1); |
| } |
} |
| |
double |
| |
speed_mpn_gcd_1N (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1); |
| |
} |
| |
|
| |
|
| double |
double |
| |
speed_mpz_jacobi (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi); |
| |
} |
| |
double |
| speed_mpn_jacobi_base (struct speed_params *s) |
speed_mpn_jacobi_base (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base); |
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base); |
| } |
} |
| |
double |
| |
speed_mpn_jacobi_base_1 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1); |
| |
} |
| |
double |
| |
speed_mpn_jacobi_base_2 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2); |
| |
} |
| |
double |
| |
speed_mpn_jacobi_base_3 (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3); |
| |
} |
| |
|
| |
|
| double |
double |
| |
speed_mpn_sqrtrem (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SQRTREM (mpn_sqrtrem); |
| |
} |
| |
|
| |
|
| |
double |
| speed_mpz_fac_ui (struct speed_params *s) |
speed_mpz_fac_ui (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPZ_UI (mpz_fac_ui); |
SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui); |
| } |
} |
| |
|
| |
|
| double |
double |
| |
speed_mpn_fib2_ui (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui); |
| |
} |
| |
double |
| speed_mpz_fib_ui (struct speed_params *s) |
speed_mpz_fib_ui (struct speed_params *s) |
| { |
{ |
| SPEED_ROUTINE_MPZ_UI (mpz_fib_ui); |
SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui); |
| } |
} |
| |
double |
| |
speed_mpz_fib2_ui (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui); |
| |
} |
| |
double |
| |
speed_mpz_lucnum_ui (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui); |
| |
} |
| |
double |
| |
speed_mpz_lucnum2_ui (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui); |
| |
} |
| |
|
| |
|
| double |
double |
| Line 799 speed_mpz_powm (struct speed_params *s) |
|
| Line 1044 speed_mpz_powm (struct speed_params *s) |
|
| { |
{ |
| SPEED_ROUTINE_MPZ_POWM (mpz_powm); |
SPEED_ROUTINE_MPZ_POWM (mpz_powm); |
| } |
} |
| |
double |
| |
speed_mpz_powm_mod (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod); |
| |
} |
| |
double |
| |
speed_mpz_powm_redc (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc); |
| |
} |
| |
double |
| |
speed_mpz_powm_ui (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui); |
| |
} |
| |
|
| |
|
| double |
double |
| Line 883 speed_noop_wxys (struct speed_params *s) |
|
| Line 1143 speed_noop_wxys (struct speed_params *s) |
|
| |
|
| |
|
| /* Compare these to see how much malloc/free costs and then how much |
/* Compare these to see how much malloc/free costs and then how much |
| _mp_default_allocate/free and mpz_init/clear add. mpz_init/clear or |
__gmp_default_allocate/free and mpz_init/clear add. mpz_init/clear or |
| mpq_init/clear will be doing a 1 limb allocate, so use that as the size |
mpq_init/clear will be doing a 1 limb allocate, so use that as the size |
| when including them in comparisons. */ |
when including them in comparisons. */ |
| |
|
| Line 907 speed_malloc_realloc_free (struct speed_params *s) |
|
| Line 1167 speed_malloc_realloc_free (struct speed_params *s) |
|
| } |
} |
| |
|
| double |
double |
| speed_mp_allocate_free (struct speed_params *s) |
speed_gmp_allocate_free (struct speed_params *s) |
| { |
{ |
| size_t bytes = s->size * BYTES_PER_MP_LIMB; |
size_t bytes = s->size * BYTES_PER_MP_LIMB; |
| SPEED_ROUTINE_ALLOC_FREE (void *p, |
SPEED_ROUTINE_ALLOC_FREE (void *p, |
| p = (*_mp_allocate_func) (bytes); |
p = (*__gmp_allocate_func) (bytes); |
| (*_mp_free_func) (p, bytes)); |
(*__gmp_free_func) (p, bytes)); |
| } |
} |
| |
|
| double |
double |
| speed_mp_allocate_reallocate_free (struct speed_params *s) |
speed_gmp_allocate_reallocate_free (struct speed_params *s) |
| { |
{ |
| size_t bytes = s->size * BYTES_PER_MP_LIMB; |
size_t bytes = s->size * BYTES_PER_MP_LIMB; |
| SPEED_ROUTINE_ALLOC_FREE |
SPEED_ROUTINE_ALLOC_FREE |
| (void *p, |
(void *p, |
| p = (*_mp_allocate_func) (BYTES_PER_MP_LIMB); |
p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB); |
| p = (*_mp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB); |
p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB); |
| (*_mp_free_func) (p, bytes)); |
(*__gmp_free_func) (p, bytes)); |
| } |
} |
| |
|
| double |
double |
| Line 1058 speed_mpz_bin_uiui (struct speed_params *s) |
|
| Line 1318 speed_mpz_bin_uiui (struct speed_params *s) |
|
| h = s->xp[0]; \ |
h = s->xp[0]; \ |
| l = s->yp[0]; \ |
l = s->yp[0]; \ |
| \ |
\ |
| switch (s->r) { \ |
if (s->r == 1) \ |
| case 1: \ |
{ \ |
| speed_starttime (); \ |
speed_starttime (); \ |
| i = s->reps; \ |
i = s->reps; \ |
| do \ |
do \ |
| { |
{ |
| |
|
| #define SPEED_MACRO_UMUL_PPMM_B \ |
#define SPEED_MACRO_UMUL_PPMM_B \ |
| } \ |
} \ |
| while (--i != 0); \ |
while (--i != 0); \ |
| t = speed_endtime (); \ |
t = speed_endtime (); \ |
| break; \ |
} \ |
| \ |
else \ |
| default: \ |
{ \ |
| speed_starttime (); \ |
speed_starttime (); \ |
| i = s->reps; \ |
i = s->reps; \ |
| do \ |
do \ |
| { |
{ |
| |
|
| #define SPEED_MACRO_UMUL_PPMM_C \ |
#define SPEED_MACRO_UMUL_PPMM_C \ |
| } \ |
} \ |
| while (--i != 0); \ |
while (--i != 0); \ |
| t = speed_endtime (); \ |
t = speed_endtime (); \ |
| break; \ |
} \ |
| } \ |
|
| \ |
\ |
| /* stop the compiler optimizing away the whole calculation! */ \ |
/* stop the compiler optimizing away the whole calculation! */ \ |
| noop_1 (h); \ |
noop_1 (h); \ |
| Line 1202 speed_mpn_umul_ppmm (struct speed_params *s) |
|
| Line 1461 speed_mpn_umul_ppmm (struct speed_params *s) |
|
| /* divisor from "r" parameter, or a default */ \ |
/* divisor from "r" parameter, or a default */ \ |
| d = s->r; \ |
d = s->r; \ |
| if (d == 0) \ |
if (d == 0) \ |
| d = 0x12345678; \ |
d = __mp_bases[10].big_base; \ |
| \ |
\ |
| if (normalize) \ |
if (normalize) \ |
| { \ |
{ \ |
| Line 1289 speed_udiv_qrnnd_preinv2norm (struct speed_params *s) |
|
| Line 1548 speed_udiv_qrnnd_preinv2norm (struct speed_params *s) |
|
| SPEED_ROUTINE_UDIV_QRNND_B; |
SPEED_ROUTINE_UDIV_QRNND_B; |
| } |
} |
| |
|
| |
double |
| |
speed_udiv_qrnnd_c (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_UDIV_QRNND_A (1); |
| |
{ |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
__udiv_qrnnd_c (q, r, r, q, d); |
| |
} |
| |
SPEED_ROUTINE_UDIV_QRNND_B; |
| |
} |
| |
|
| #if HAVE_NATIVE_mpn_udiv_qrnnd |
#if HAVE_NATIVE_mpn_udiv_qrnnd |
| |
|
| #if defined (__hppa) && W_TYPE_SIZE == 64 |
#if defined (__hppa) && W_TYPE_SIZE == 64 |
| Line 1300 speed_udiv_qrnnd_preinv2norm (struct speed_params *s) |
|
| Line 1578 speed_udiv_qrnnd_preinv2norm (struct speed_params *s) |
|
| double |
double |
| speed_mpn_udiv_qrnnd (struct speed_params *s) |
speed_mpn_udiv_qrnnd (struct speed_params *s) |
| { |
{ |
| |
|
| SPEED_ROUTINE_UDIV_QRNND_A (1); |
SPEED_ROUTINE_UDIV_QRNND_A (1); |
| { |
{ |
| CALL_MPN_UDIV_QRNND; |
CALL_MPN_UDIV_QRNND; |
| Line 1317 speed_mpn_udiv_qrnnd (struct speed_params *s) |
|
| Line 1594 speed_mpn_udiv_qrnnd (struct speed_params *s) |
|
| SPEED_ROUTINE_UDIV_QRNND_B; |
SPEED_ROUTINE_UDIV_QRNND_B; |
| } |
} |
| #endif |
#endif |
| |
|
| |
|
| |
double |
| |
speed_invert_limb (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d)); |
| |
} |
| |
|
| |
|
| |
/* xp[0] might not be particularly random, but should give an indication how |
| |
"/" runs. Same for speed_operator_mod below. */ |
| |
double |
| |
speed_operator_div (struct speed_params *s) |
| |
{ |
| |
double t; |
| |
unsigned i; |
| |
mp_limb_t x, q, d; |
| |
|
| |
s->time_divisor = 10; |
| |
|
| |
/* divisor from "r" parameter, or a default */ |
| |
d = s->r; |
| |
if (d == 0) |
| |
d = __mp_bases[10].big_base; |
| |
|
| |
x = s->xp[0]; |
| |
q = 0; |
| |
|
| |
speed_starttime (); |
| |
i = s->reps; |
| |
do |
| |
{ |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
q ^= x; q /= d; |
| |
} |
| |
while (--i != 0); |
| |
t = speed_endtime (); |
| |
|
| |
/* stop the compiler optimizing away the whole calculation! */ |
| |
noop_1 (q); |
| |
|
| |
return t; |
| |
} |
| |
|
| |
double |
| |
speed_operator_mod (struct speed_params *s) |
| |
{ |
| |
double t; |
| |
unsigned i; |
| |
mp_limb_t x, r, d; |
| |
|
| |
s->time_divisor = 10; |
| |
|
| |
/* divisor from "r" parameter, or a default */ |
| |
d = s->r; |
| |
if (d == 0) |
| |
d = __mp_bases[10].big_base; |
| |
|
| |
x = s->xp[0]; |
| |
r = 0; |
| |
|
| |
speed_starttime (); |
| |
i = s->reps; |
| |
do |
| |
{ |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
r ^= x; r %= d; |
| |
} |
| |
while (--i != 0); |
| |
t = speed_endtime (); |
| |
|
| |
/* stop the compiler optimizing away the whole calculation! */ |
| |
noop_1 (r); |
| |
|
| |
return t; |
| |
} |
| |
|
| |
|
| |
/* r==0 measures on data with the values uniformly distributed. This will |
| |
be typical for count_trailing_zeros in a GCD etc. |
| |
|
| |
r==1 measures on data with the resultant count uniformly distributed |
| |
between 0 and BITS_PER_MP_LIMB-1. This is probably sensible for |
| |
count_leading_zeros on the high limbs of divisors. */ |
| |
|
| |
int |
| |
speed_routine_count_zeros_setup (struct speed_params *s, |
| |
mp_ptr xp, int leading, int zero) |
| |
{ |
| |
int i, c; |
| |
mp_limb_t n; |
| |
|
| |
if (s->r == 0) |
| |
{ |
| |
/* Make uniformly distributed data. If zero isn't allowed then change |
| |
it to 1 for leading, or 0x800..00 for trailing. */ |
| |
MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE); |
| |
if (! zero) |
| |
for (i = 0; i < SPEED_BLOCK_SIZE; i++) |
| |
if (xp[i] == 0) |
| |
xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT; |
| |
} |
| |
else if (s->r == 1) |
| |
{ |
| |
/* Make counts uniformly distributed. A randomly chosen bit is set, and |
| |
for leading the rest above it are cleared, or for trailing then the |
| |
rest below. */ |
| |
for (i = 0; i < SPEED_BLOCK_SIZE; i++) |
| |
{ |
| |
mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB); |
| |
mp_limb_t keep_below = set-1; |
| |
mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below; |
| |
mp_limb_t keep = (leading ? keep_below : keep_above); |
| |
xp[i] = (s->xp_block[i] & keep) | set; |
| |
} |
| |
} |
| |
else |
| |
{ |
| |
return 0; |
| |
} |
| |
|
| |
/* Account for the effect of n^=c. */ |
| |
c = 0; |
| |
for (i = 0; i < SPEED_BLOCK_SIZE; i++) |
| |
{ |
| |
n = xp[i]; |
| |
xp[i] ^= c; |
| |
|
| |
if (leading) |
| |
count_leading_zeros (c, n); |
| |
else |
| |
count_trailing_zeros (c, n); |
| |
} |
| |
|
| |
return 1; |
| |
} |
| |
|
| |
double |
| |
speed_count_leading_zeros (struct speed_params *s) |
| |
{ |
| |
#ifdef COUNT_LEADING_ZEROS_0 |
| |
#define COUNT_LEADING_ZEROS_0_ALLOWED 1 |
| |
#else |
| |
#define COUNT_LEADING_ZEROS_0_ALLOWED 0 |
| |
#endif |
| |
|
| |
SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED); |
| |
count_leading_zeros (c, n); |
| |
SPEED_ROUTINE_COUNT_ZEROS_B (); |
| |
} |
| |
double |
| |
speed_count_trailing_zeros (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_COUNT_ZEROS_A (0, 0); |
| |
count_trailing_zeros (c, n); |
| |
SPEED_ROUTINE_COUNT_ZEROS_B (); |
| |
} |
| |
|
| |
|
| |
double |
| |
speed_mpn_get_str (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_GET_STR (mpn_get_str); |
| |
} |
| |
|
| |
double |
| |
speed_mpn_set_str (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SET_STR (mpn_set_str); |
| |
} |
| |
double |
| |
speed_mpn_set_str_basecase (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_basecase); |
| |
} |
| |
double |
| |
speed_mpn_set_str_subquad (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_subquad); |
| |
} |
| |
|
| |
|
| |
double |
| |
speed_MPN_ZERO (struct speed_params *s) |
| |
{ |
| |
SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size)); |
| |
} |