=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/x86/k7/mmx/Attic/popham.asm,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gmp/mpn/x86/k7/mmx/Attic/popham.asm 2000/12/01 05:45:02 1.1.1.2 +++ OpenXM_contrib/gmp/mpn/x86/k7/mmx/Attic/popham.asm 2003/08/25 16:06:29 1.1.1.3 @@ -1,10 +1,7 @@ dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming dnl distance. -dnl -dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb - -dnl Copyright (C) 2000 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -23,36 +20,12 @@ dnl License along with the GNU MP Library; see the fi dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. - include(`../config.m4') -dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on -dnl FreeBSD 3.3 and 3.4 doesn't recognise it. +C K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb -define(psadbw_mm4_mm0, -`ifelse(m4_ifdef_anyof_p(`HAVE_TARGET_CPU_athlon', - `HAVE_TARGET_CPU_pentium3'),1, - `.byte 0x0f,0xf6,0xc4 C psadbw %mm4, %mm0', -`m4_warning(`warning, using simulated and only partly functional psadbw, use for testing only -') C this works enough for the sum of bytes done below, making it - C possible to test on an older cpu - leal -8(%esp), %esp - movq %mm4, (%esp) - movq %mm0, %mm4 -forloop(i,1,7, -` psrlq $ 8, %mm4 - paddb %mm4, %mm0 -') - pushl $ 0 - pushl $ 0xFF - pand (%esp), %mm0 - movq 8(%esp), %mm4 - leal 16(%esp), %esp -')') - - C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size); C @@ -96,35 +69,29 @@ MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) ifdef(`PIC',,` dnl non-PIC - DATA + RODATA ALIGN(8) -define(LS, -m4_assert_numargs(1) -`LF(M4_function,`$1')') - -LS(rodata_AAAAAAAAAAAAAAAA): +L(rodata_AAAAAAAAAAAAAAAA): .long 0xAAAAAAAA .long 0xAAAAAAAA -LS(rodata_3333333333333333): +L(rodata_3333333333333333): .long 0x33333333 .long 0x33333333 -LS(rodata_0F0F0F0F0F0F0F0F): +L(rodata_0F0F0F0F0F0F0F0F): .long 0x0F0F0F0F .long 0x0F0F0F0F ') - .text + TEXT ALIGN(32) PROLOGUE(M4_function) deflit(`FRAME',0) movl PARAM_SIZE, %ecx - orl %ecx, %ecx - jz L(zero) ifdef(`PIC',` movl $0xAAAAAAAA, %eax @@ -144,9 +111,9 @@ ifdef(`PIC',` punpckldq %mm5, %mm5 ',` - movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7 - movq LS(rodata_3333333333333333), %mm6 - movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5 + movq L(rodata_AAAAAAAAAAAAAAAA), %mm7 + movq L(rodata_3333333333333333), %mm6 + movq L(rodata_0F0F0F0F0F0F0F0F), %mm5 ') pxor %mm4, %mm4 @@ -166,7 +133,7 @@ HAM(` movl PARAM_SRC2, %edx') movd (%eax,%ecx,8), %mm1 -HAM(` movd 0(%edx,%ecx,8), %mm0 +HAM(` movd (%edx,%ecx,8), %mm0 pxor %mm0, %mm1 ') orl %ecx, %ecx @@ -221,7 +188,7 @@ L(loaded): paddd %mm1, %mm0 C bytes - psadbw_mm4_mm0 + psadbw( %mm4, %mm0) paddd %mm0, %mm2 C add to total jnz L(top) @@ -229,11 +196,6 @@ L(loaded): movd %mm2, %eax emms - ret - - -L(zero): - movl $0, %eax ret EPILOGUE()