diff options
| author | scuri <scuri> | 2008-10-17 06:10:15 +0000 | 
|---|---|---|
| committer | scuri <scuri> | 2008-10-17 06:10:15 +0000 | 
| commit | 5a422aba704c375a307a902bafe658342e209906 (patch) | |
| tree | 5005011e086bb863d8fb587ad3319bbec59b2447 /src/fftw3/kernel | |
First commit - moving from LuaForge to SourceForge
Diffstat (limited to 'src/fftw3/kernel')
40 files changed, 5400 insertions, 0 deletions
diff --git a/src/fftw3/kernel/align.c b/src/fftw3/kernel/align.c new file mode 100644 index 0000000..7f475d5 --- /dev/null +++ b/src/fftw3/kernel/align.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: align.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +#if HAVE_3DNOW +#  define ALGN 8 +#elif HAVE_SIMD +#  define ALGN 16 +#elif HAVE_K7 +#  define ALGN 8 +#else +#  define ALGN (sizeof(R)) +#endif + +/* NONPORTABLE */ +int X(alignment_of)(R *p) +{ +     return (int)(((uintptr_t) p) % ALGN); +} diff --git a/src/fftw3/kernel/alloc.c b/src/fftw3/kernel/alloc.c new file mode 100644 index 0000000..a95e0e8 --- /dev/null +++ b/src/fftw3/kernel/alloc.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: alloc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN +#  if defined(HAVE_MALLOC_H) +#    include <malloc.h> +#  else +extern void *memalign(size_t, size_t); +#  endif +#endif + +#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN +extern int posix_memalign(void **, size_t, size_t); +#endif + +#if defined(macintosh) /* MacOS 9 */ +#  include <Multiprocessing.h> +#endif + +#define real_malloc X(malloc) +#define real_free free /* memalign and malloc use ordinary free */ + +#if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16) +/* Our own 16-byte aligned malloc/free.  Assumes sizeof(void*) is a +   power of two <= 8 and that malloc is at least sizeof(void*)-aligned. + +   The main reason for this routine is that, as of this writing, +   Windows does not include any aligned allocation routines in its +   system libraries, and instead provides an implementation with a +   Visual C++ "Processor Pack" that you have to statically link into +   your program.  We do not want to require users to have VC++ +   (e.g. gcc/MinGW should be fine).  Our code should be at least as good +   as the MS _aligned_malloc, in any case, according to second-hand +   reports of the algorithm it employs (also based on plain malloc). */ +static void *our_malloc16(size_t n) +{ +     void *p0, *p; +     if (!(p0 = malloc(n + 16))) return (void *) 0; +     p = (void *) (((uintptr_t) p0 + 16) & (~((uintptr_t) 15))); +     *((void **) p - 1) = p0; +     return p; +} +static void our_free16(void *p) +{ +     if (p) free(*((void **) p - 1)); +} +#endif + +/* part of user-callable API */ +void *X(malloc)(size_t n) +{ +     void *p; + +#if defined(MIN_ALIGNMENT) + +#  if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16) +     p = our_malloc16(n); +#    undef real_free +#    define real_free our_free16 + +#  elif defined(HAVE_MEMALIGN) +     p = memalign(MIN_ALIGNMENT, n); + +#  elif defined(HAVE_POSIX_MEMALIGN) +     /* note: posix_memalign is broken in glibc 2.2.5: it constrains +	the size, not the alignment, to be (power of two) * sizeof(void*). +        The bug seems to have been fixed as of glibc 2.3.1. */ +     if (posix_memalign(&p, MIN_ALIGNMENT, n)) +	  p = (void*) 0; + +#  elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) +     /* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */ +     p = (void *) _mm_malloc(n, MIN_ALIGNMENT); +#    undef real_free +#    define real_free _mm_free + +#  elif defined(_MSC_VER) +     /* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD +	and _aligned_malloc/free (uses malloc.h) */ +     p = (void *) _aligned_malloc(n, MIN_ALIGNMENT); +#    undef real_free +#    define real_free _aligned_free + +#  elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16) +     /* MacOS X malloc is already 16-byte aligned */ +     p = malloc(n); + +#  elif defined(macintosh) /* MacOS 9 */ +     p = (void *) MPAllocateAligned(n, +#    if MIN_ALIGNMENT == 8 +				    kMPAllocate8ByteAligned, +#    elif MIN_ALIGNMENT == 16 +				    kMPAllocate16ByteAligned, +#    elif MIN_ALIGNMENT == 32 +				    kMPAllocate32ByteAligned, +#    else +#      error "Unknown alignment for MPAllocateAligned" +#    endif +				    0); +#    undef real_free +#    define real_free MPFree + +#  else +     /* Add your machine here and send a patch to fftw@fftw.org  +        or (e.g. for Windows) configure --with-our-malloc16 */ +#    error "Don't know how to malloc() aligned memory." +#  endif + +#else /* !defined(MIN_ALIGMENT) */ +     p = malloc(n); +#endif + +     return p; +} + +/* part of user-callable API */ +void X(free)(void *p) +{ +     real_free(p); +} + +/********************************************************** + *   DEBUGGING CODE + **********************************************************/ +#if defined(FFTW_DEBUG_MALLOC) + +#include <stdio.h> + +/* +  debugging malloc/free.  +  +  1) Initialize every malloced and freed area to random values, just +  to make sure we are not using uninitialized pointers. +  +  2) check for blocks freed twice. +  +  3) Check for writes past the ends of allocated blocks +  +  4) destroy contents of freed blocks in order to detect incorrect reuse. +  +  5) keep track of who allocates what and report memory leaks +  +  This code is a quick and dirty hack.  May be nonportable.  +  Use at your own risk. +  +*/ + +#define MAGIC ((size_t)0xABadCafe) +#define PAD_FACTOR 2 +#define SZ_HEADER (4 * sizeof(size_t)) +#define HASHSZ 1031 + +static unsigned int hashaddr(void *p) +{ +     return ((unsigned long)p) % HASHSZ; +} + +struct mstat { +     int siz; +     int maxsiz; +     int cnt; +     int maxcnt; +}; + +static struct mstat mstat[MALLOC_WHAT_LAST]; + +struct minfo { +     const char *file; +     int line; +     size_t n; +     void *p; +     struct minfo *next; +}; + +static struct minfo *minfo[HASHSZ] = {0}; + +#ifdef HAVE_THREADS +int X(in_thread) = 0; +#endif + +void *X(malloc_debug)(size_t n, enum malloc_tag what, +                      const char *file, int line) +{ +     char *p; +     size_t i; +     struct minfo *info; +     struct mstat *stat = mstat + what; +     struct mstat *estat = mstat + EVERYTHING; + +     if (n == 0) +          n = 1; + +     if (!IN_THREAD) { +	  stat->siz += n; +	  if (stat->siz > stat->maxsiz) +	       stat->maxsiz = stat->siz; +	  estat->siz += n; +	  if (estat->siz > estat->maxsiz) +	       estat->maxsiz = estat->siz; +     } + +     p = (char *) real_malloc(PAD_FACTOR * n + SZ_HEADER); +     A(p); + +     /* store the sz in a known position */ +     ((size_t *) p)[0] = n; +     ((size_t *) p)[1] = MAGIC; +     ((size_t *) p)[2] = what; + +     /* fill with junk */ +     for (i = 0; i < PAD_FACTOR * n; i++) +          p[i + SZ_HEADER] = (char) (i ^ 0xEF); + +     if (!IN_THREAD) { +	  ++stat->cnt; +	  ++estat->cnt; +	   +	  if (stat->cnt > stat->maxcnt) +	       stat->maxcnt = stat->cnt; +	  if (estat->cnt > estat->maxcnt) +	       estat->maxcnt = estat->cnt; +     } + +     /* skip the info we stored previously */ +     p = p + SZ_HEADER; + +     if (!IN_THREAD) { +	  unsigned int h = hashaddr(p); +	  /* record allocation in allocation list */ +	  info = (struct minfo *) malloc(sizeof(struct minfo)); +	  info->n = n; +	  info->file = file; +	  info->line = line; +	  info->p = p; +	  info->next = minfo[h]; +	  minfo[h] = info; +     } + +     return (void *) p; +} + +void X(ifree)(void *p) +{ +     char *q; + +     A(p); + +     q = ((char *) p) - SZ_HEADER; +     A(q); + +     { +          size_t n = ((size_t *) q)[0]; +          size_t magic = ((size_t *) q)[1]; +          int what = ((size_t *) q)[2]; +          size_t i; +          struct mstat *stat = mstat + what; +          struct mstat *estat = mstat + EVERYTHING; + +          /* set to zero to detect duplicate free's */ +          ((size_t *) q)[0] = 0; + +          A(magic == MAGIC); +          ((size_t *) q)[1] = ~MAGIC; + +	  if (!IN_THREAD) { +	       stat->siz -= n; +	       A(stat->siz >= 0); +	       estat->siz -= n; +	       A(estat->siz >= 0); +	  } + +          /* check for writing past end of array: */ +          for (i = n; i < PAD_FACTOR * n; ++i) +               if (q[i + SZ_HEADER] != (char) (i ^ 0xEF)) { +                    A(0 /* array bounds overwritten */ ); +               } +          for (i = 0; i < PAD_FACTOR * n; ++i) +               q[i + SZ_HEADER] = (char) (i ^ 0xAD); + +	  if (!IN_THREAD) { +	       --stat->cnt; +	       --estat->cnt; +	        +	       A(stat->cnt >= 0); +	       A((stat->cnt == 0 && stat->siz == 0) || +		 (stat->cnt > 0 && stat->siz > 0)); +	       A(estat->cnt >= 0); +	       A((estat->cnt == 0 && estat->siz == 0) || +		 (estat->cnt > 0 && estat->siz > 0)); +	  } + +          real_free(q); +     } + +     if (!IN_THREAD) { +          /* delete minfo entry */ +	  unsigned int h = hashaddr(p); +          struct minfo **i; + +          for (i = minfo + h; *i; i = &((*i)->next)) { +               if ((*i)->p == p) { +                    struct minfo *i0 = (*i)->next; +                    free(*i); +                    *i = i0; +                    return; +               } +          } + +          A(0 /* no entry in minfo list */ ); +     } +} + +void X(malloc_print_minfo)(int verbose) +{ +     struct minfo *info; +     int what; +     unsigned int h; + +     if (verbose) { +	  static const char *names[MALLOC_WHAT_LAST] = { +	       "EVERYTHING", +	       "PLANS", "SOLVERS", "PROBLEMS", "BUFFERS", +	       "HASHT", "TENSORS", "PLANNERS", "SLVDSC", "TWIDDLES", +	       "STRIDES", "OTHER" +	  }; + +	  printf("%12s %8s %8s %10s %10s\n", +		 "what", "cnt", "maxcnt", "siz", "maxsiz"); + +	  for (what = 0; what < MALLOC_WHAT_LAST; ++what) { +	       struct mstat *stat = mstat + what; +	       printf("%12s %8d %8d %10d %10d\n", +		      names[what], stat->cnt, stat->maxcnt, +		      stat->siz, stat->maxsiz); +	  } +     } + +     for (h = 0; h < HASHSZ; ++h)  +	  if (minfo[h]) { +	       printf("\nUnfreed allocations:\n"); +	       break; +	  } + +     for (h = 0; h < HASHSZ; ++h)  +	  for (info = minfo[h]; info; info = info->next) { +	       printf("%s:%d:  %d bytes at %p\n", +		      info->file, info->line, info->n, info->p); +	  } +} + +#else +/********************************************************** + *   NON DEBUGGING CODE + **********************************************************/ +/* production version, no hacks */ + +void *X(malloc_plain)(size_t n) +{ +     void *p; +     if (n == 0) +          n = 1; +     p = real_malloc(n); +     CK(p); + +#ifdef MIN_ALIGMENT +     A((((uintptr_t)p) % MIN_ALIGNMENT) == 0); +#endif + +     return p; +} + +void X(ifree)(void *p) +{ +     real_free(p); +} + +#endif + +void X(ifree0)(void *p) +{ +     /* common pattern */ +     if (p) X(ifree)(p); +} diff --git a/src/fftw3/kernel/assert.c b/src/fftw3/kernel/assert.c new file mode 100644 index 0000000..a066db0 --- /dev/null +++ b/src/fftw3/kernel/assert.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: assert.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ +#include "ifftw.h" +#include <stdio.h> +#include <stdlib.h> + +void X(assertion_failed)(const char *s, int line, const char *file) +{ +     fflush(stdout); +     fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s); +     exit(EXIT_FAILURE); +} diff --git a/src/fftw3/kernel/awake.c b/src/fftw3/kernel/awake.c new file mode 100644 index 0000000..7ee716f --- /dev/null +++ b/src/fftw3/kernel/awake.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: awake.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +void X(null_awake)(plan *ego, int awake) +{ +     UNUSED(ego); +     UNUSED(awake); +     /* do nothing */ +} diff --git a/src/fftw3/kernel/cycle.h b/src/fftw3/kernel/cycle.h new file mode 100644 index 0000000..a324c2a --- /dev/null +++ b/src/fftw3/kernel/cycle.h @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* $Id: cycle.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* machine-dependent cycle counters code. Needs to be inlined. */ + +/***************************************************************************/ +/* To use the cycle counters in your code, simply #include "cycle.h" (this +   file), and then use the functions/macros: + +                 ticks getticks(void); + +   ticks is an opaque typedef defined below, representing the current time. +   You extract the elapsed time between two calls to gettick() via: + +                 double elapsed(ticks t1, ticks t0); + +   which returns a double-precision variable in arbitrary units.  You +   are not expected to convert this into human units like seconds; it +   is intended only for *comparisons* of time intervals. + +   (In order to use some of the OS-dependent timer routines like +   Solaris' gethrtime, you need to paste the autoconf snippet below +   into your configure.ac file and #include "config.h" before cycle.h, +   or define the relevant macros manually if you are not using autoconf.) +*/ + +/***************************************************************************/ +/* This file uses macros like HAVE_GETHRTIME that are assumed to be +   defined according to whether the corresponding function/type/header +   is available on your system.  The necessary macros are most +   conveniently defined if you are using GNU autoconf, via the tests: +    +   dnl --------------------------------------------------------------------- + +   AC_C_INLINE +   AC_HEADER_TIME +   AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h]) + +   AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H +#include <sys/time.h> +#endif]) + +   AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime]) + +   dnl Cray UNICOS _rtc() (real-time clock) intrinsic +   AC_MSG_CHECKING([for _rtc intrinsic]) +   rtc_ok=yes +   AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H +#include <intrinsics.h> +#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) +   AC_MSG_RESULT($rtc_ok) + +   dnl --------------------------------------------------------------------- +*/ + +/***************************************************************************/ + +#if TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# if HAVE_SYS_TIME_H +#  include <sys/time.h> +# else +#  include <time.h> +# endif +#endif + +#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ +{									  \ +     return (double)(t1 - t0);						  \ +} + +/*----------------------------------------------------------------*/ +/* Solaris */ +#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) +typedef hrtime_t ticks; + +#define getticks gethrtime + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* AIX v. 4+ routines to read the real-time clock or time-base register */ +#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) +typedef timebasestruct_t ticks; + +static inline ticks getticks(void) +{ +     ticks t; +     read_real_time(&t, TIMEBASE_SZ); +     return t; +} + +static inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ +{ +     time_base_to_time(&t1, TIMEBASE_SZ); +     time_base_to_time(&t0, TIMEBASE_SZ); +     return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low)); +} + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PowerPC ``cycle'' counter using the time base register. + */ +#if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))  && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ +     unsigned int tbl, tbu0, tbu1; + +     do { +	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); +	  __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); +	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); +     } while (tbu0 != tbu1); + +     return (((unsigned long long)tbu0) << 32) | tbl; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +/* + * Pentium cycle counter  + */ +#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__)  && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ +     ticks ret; + +     __asm__ __volatile__("rdtsc": "=A" (ret)); +     /* no input, nothing else clobbered */ +     return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* Visual C++ -- thanks to Morten Nissov for his help with this */ +#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) +#include <windows.h> +typedef LARGE_INTEGER ticks; +#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ + +static __inline ticks getticks(void) +{ +     ticks ret; + +     __asm { +	  RDTSC +	  mov ret.HighPart, edx +	  mov ret.LowPart, eax +     } +     return ret; +} + +static __inline double elapsed(ticks t1, ticks t0) +{   +     return (double)(t1.QuadPart - t0.QuadPart); +}   + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * X86-64 cycle counter + */ +#if defined(__GNUC__) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ +     unsigned a, d;  +     asm volatile("rdtsc" : "=a" (a), "=d" (d));  +     return ((ticks)a) | (((ticks)d) << 32);  +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* Visual C++ (FIXME: how to detect compilation for x86-64?) */ +#if _MSC_VER >= 1400 && !defined(HAVE_TICK_COUNTER) +typedef ULONG64 ticks; + +#define getticks __rdtsc + +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * IA64 cycle counter + */ +#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ +     ticks ret; + +     __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); +     return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ +#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) +#include <machine/sys/inline.h> +typedef unsigned long ticks; + +static inline ticks getticks(void) +{ +     ticks ret; + +     ret = _Asm_mov_from_ar (_AREG_ITC); +     return ret; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/* intel's ecc compiler */ +#if defined(__ECC) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; +#include <ia64intrin.h> + +static __inline__ ticks getticks(void) +{ +     return __getReg(_IA64_REG_AR_ITC); +} +  +INLINE_ELAPSED(__inline__) +  +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PA-RISC cycle counter  + */ +#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +#  ifdef __GNUC__ +static __inline__ ticks getticks(void) +{ +     ticks ret; + +     __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); +     /* no input, nothing else clobbered */ +     return ret; +} +#  else +#  include <machine/inline.h> +static inline unsigned long getticks(void) +{ +     register ticks ret; +     _MFCTL(16, ret); +     return ret; +} +#  endif + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* S390, courtesy of James Treacy */ +#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ +     ticks cycles; +     __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); +     return cycles; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) +/* + * The 32-bit cycle counter on alpha overflows pretty quickly,  + * unfortunately.  A 1GHz machine overflows in 4 seconds. + */ +typedef unsigned int ticks; + +static __inline__ ticks getticks(void) +{ +     unsigned long cc; +     __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); +     return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ +     ticks ret; +     __asm__("rd %%tick, %0" : "=r" (ret)); +     return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if defined(__DECC) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) +#  include <c_asm.h> +typedef unsigned int ticks; + +static __inline ticks getticks(void) +{ +     unsigned long cc; +     cc = asm("rpcc %v0"); +     return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +/* SGI/Irix */ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) +typedef struct timespec ticks; + +static inline ticks getticks(void) +{ +     struct timespec t; +     clock_gettime(CLOCK_SGI_CYCLE, &t); +     return t; +} + +static inline double elapsed(ticks t1, ticks t0) +{ +     return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 + +	  (double)(t1.tv_nsec - t0.tv_nsec); +} +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* Cray UNICOS _rtc() intrinsic function */ +#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) +#ifdef HAVE_INTRINSICS_H +#  include <intrinsics.h> +#endif + +typedef long long ticks; + +#define getticks _rtc + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + diff --git a/src/fftw3/kernel/debug.c b/src/fftw3/kernel/debug.c new file mode 100644 index 0000000..63e4a22 --- /dev/null +++ b/src/fftw3/kernel/debug.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: debug.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ +#include "ifftw.h" + +#ifdef FFTW_DEBUG +#include <stdio.h> + +typedef struct { +     printer super; +     FILE *f; +} P_file; + +static void putchr_file(printer *p_, char c) +{ +     P_file *p = (P_file *) p_; +     fputc(c, p->f); +} + +static printer *mkprinter_file(FILE *f) +{ +     P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0); +     p->f = f; +     return &p->super; +} + +void X(debug)(const char *format, ...) +{ +     va_list ap; +     printer *p = mkprinter_file(stderr); +     va_start(ap, format); +     p->vprint(p, format, ap); +     va_end(ap); +     X(printer_destroy)(p); +} +#endif diff --git a/src/fftw3/kernel/hash.c b/src/fftw3/kernel/hash.c new file mode 100644 index 0000000..f12d5a4 --- /dev/null +++ b/src/fftw3/kernel/hash.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +#include "ifftw.h" + +unsigned X(hash)(const char *s) +{ +     unsigned h = 0xDEADBEEFu; +     do { +	  h = h * 17 + (int)*s; +     } while (*s++); +     return h; +} + diff --git a/src/fftw3/kernel/iabs.c b/src/fftw3/kernel/iabs.c new file mode 100644 index 0000000..d2bdb7c --- /dev/null +++ b/src/fftw3/kernel/iabs.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: iabs.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +int X(iabs)(int a) +{ +     return (int) (a < 0 ? -a : a); +} diff --git a/src/fftw3/kernel/ifftw.h b/src/fftw3/kernel/ifftw.h new file mode 100644 index 0000000..0269e18 --- /dev/null +++ b/src/fftw3/kernel/ifftw.h @@ -0,0 +1,848 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: ifftw.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* FFTW internal header file */ +#ifndef __IFFTW_H__ +#define __IFFTW_H__ + +#include "config.h" + +#include <stdlib.h>		/* size_t */ +#include <stdarg.h>		/* va_list */ +#include <stddef.h>             /* ptrdiff_t */ + +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif + +#if HAVE_STDINT_H +# include <stdint.h>             /* uintptr_t, maybe */ +#endif + +#if HAVE_INTTYPES_H +# include <inttypes.h>           /* uintptr_t, maybe */ +#endif + +/* determine precision and name-mangling scheme */ +#define CONCAT(prefix, name) prefix ## name +#if defined(FFTW_SINGLE) +typedef float R; +#define X(name) CONCAT(fftwf_, name) +#elif defined(FFTW_LDOUBLE) +typedef long double R; +#define X(name) CONCAT(fftwl_, name) +#else +typedef double R; +#define X(name) CONCAT(fftw_, name) +#endif + +/* dummy use of unused parameters to silence compiler warnings */ +#define UNUSED(x) (void)x + +#define FFT_SIGN (-1)  /* sign convention for forward transforms */ + +/* get rid of that object-oriented stink: */ +#define REGISTER_SOLVER(p, s) X(solver_register)(p, s) + +#define STRINGIZEx(x) #x +#define STRINGIZE(x) STRINGIZEx(x) + +#ifndef HAVE_K7 +#define HAVE_K7 0 +#endif + +#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || defined(HAVE_3DNOW) +#define HAVE_SIMD 1 +#else +#define HAVE_SIMD 0 +#endif + +/* forward declarations */ +typedef struct problem_s problem; +typedef struct plan_s plan; +typedef struct solver_s solver; +typedef struct planner_s planner; +typedef struct printer_s printer; +typedef struct scanner_s scanner; + +/*-----------------------------------------------------------------------*/ +/* alloca: */ +#if HAVE_SIMD +#define MIN_ALIGNMENT 16 +#endif + +#ifdef HAVE_ALLOCA +   /* use alloca if available */ + +#ifndef alloca +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +#  include <malloc.h> +#  define alloca _alloca +# else +#  if HAVE_ALLOCA_H +#   include <alloca.h> +#  else +#   ifdef _AIX + #pragma alloca +#   else +#    ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca(size_t); +#    endif +#   endif +#  endif +# endif +#endif +#endif + +#  ifdef MIN_ALIGNMENT +#    define STACK_MALLOC(T, p, x)				\ +     {								\ +         p = (T)alloca((x) + MIN_ALIGNMENT);			\ +         p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) &	\ +               (~(uintptr_t)(MIN_ALIGNMENT - 1)));		\ +     } +#    define STACK_FREE(x)  +#  else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */ +#    define STACK_MALLOC(T, p, x) p = (T)alloca(x)  +#    define STACK_FREE(x)  +#  endif + +#else /* ! HAVE_ALLOCA */ +   /* use malloc instead of alloca */ +#  define STACK_MALLOC(T, p, x) p = (T)MALLOC(x, OTHER) +#  define STACK_FREE(x) X(ifree)(x) +#endif /* ! HAVE_ALLOCA */ + +/*-----------------------------------------------------------------------*/ +/* define uintptr_t if it is not already defined */ + +#ifndef HAVE_UINTPTR_T +#  if SIZEOF_VOID_P == 0 +#    error sizeof void* is unknown! +#  elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P +     typedef unsigned int uintptr_t; +#  elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P +     typedef unsigned long uintptr_t; +#  elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P +     typedef unsigned long long uintptr_t; +#  else +#    error no unsigned integer type matches void* sizeof! +#  endif +#endif + +/*-----------------------------------------------------------------------*/ +/* assert.c: */ +extern void X(assertion_failed)(const char *s, int line, const char *file); + +/* always check */ +#define CK(ex)						 \ +      (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) + +#ifdef FFTW_DEBUG +/* check only if debug enabled */ +#define A(ex)						 \ +      (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) +#else +#define A(ex) /* nothing */ +#endif + +extern void X(debug)(const char *format, ...); +#define D X(debug) + +/*-----------------------------------------------------------------------*/ +/* alloc.c: */ + +/* objects allocated by malloc, for statistical purposes */ +enum malloc_tag { +     EVERYTHING, +     PLANS, +     SOLVERS, +     PROBLEMS, +     BUFFERS, +     HASHT, +     TENSORS, +     PLANNERS, +     SLVDESCS, +     TWIDDLES, +     STRIDES, +     OTHER, +     MALLOC_WHAT_LAST		/* must be last */ +}; + +extern void X(ifree)(void *ptr); +extern void X(ifree0)(void *ptr); + +#ifdef FFTW_DEBUG_MALLOC + +extern void *X(malloc_debug)(size_t n, enum malloc_tag what, +			     const char *file, int line); +#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__) +#define NATIVE_MALLOC(n, what) MALLOC(n, what) +void X(malloc_print_minfo)(int vrbose); + +#else /* ! FFTW_DEBUG_MALLOC */ + +extern void *X(malloc_plain)(size_t sz); +#define MALLOC(n, what)  X(malloc_plain)(n) +#define NATIVE_MALLOC(n, what) malloc(n) + +#endif + +#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && defined(HAVE_THREADS) +extern int X(in_thread); +#  define IN_THREAD X(in_thread) +#  define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1 +#  define THREAD_OFF X(in_thread) = in_thread_save; } +#else +#  define IN_THREAD 0 +#  define THREAD_ON  +#  define THREAD_OFF  +#endif + +/*-----------------------------------------------------------------------*/ +/* ops.c: */ +/* + * ops counter.  The total number of additions is add + fma + * and the total number of multiplications is mul + fma. + * Total flops = add + mul + 2 * fma + */ +typedef struct { +     double add; +     double mul; +     double fma; +     double other; +} opcnt; + +void X(ops_zero)(opcnt *dst); +void X(ops_other)(int o, opcnt *dst); +void X(ops_cpy)(const opcnt *src, opcnt *dst); + +void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst); +void X(ops_add2)(const opcnt *a, opcnt *dst); + +/* dst = m * a + b */ +void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst); + +/* dst += m * a */ +void X(ops_madd2)(int m, const opcnt *a, opcnt *dst); + + +/*-----------------------------------------------------------------------*/ +/* minmax.c: */ +int X(imax)(int a, int b); +int X(imin)(int a, int b); + +/*-----------------------------------------------------------------------*/ +/* iabs.c: */ +int X(iabs)(int a); + +/*-----------------------------------------------------------------------*/ +/* md5.c */ + +#if SIZEOF_UNSIGNED_INT >= 4 +typedef unsigned int md5uint; +#else +typedef unsigned long md5uint; /* at least 32 bits as per C standard */ +#endif + +typedef md5uint md5sig[4]; + +typedef struct { +     md5sig s; /* state and signature */ + +     /* fields not meant to be used outside md5.c: */ +     unsigned char c[64]; /* stuff not yet processed */ +     unsigned l;  /* total length.  Should be 64 bits long, but this is +		     good enough for us */ +} md5; + +void X(md5begin)(md5 *p); +void X(md5putb)(md5 *p, const void *d_, int len); +void X(md5puts)(md5 *p, const char *s); +void X(md5putc)(md5 *p, unsigned char c); +void X(md5int)(md5 *p, int i); +void X(md5unsigned)(md5 *p, unsigned i); +void X(md5ptrdiff)(md5 *p, ptrdiff_t d); +void X(md5end)(md5 *p); + +/*-----------------------------------------------------------------------*/ +/* tensor.c: */ +#define STRUCT_HACK_KR +#undef STRUCT_HACK_C99 + +typedef struct { +     int n; +     int is;			/* input stride */ +     int os;			/* output stride */ +} iodim; + +typedef struct { +     int rnk; +#if defined(STRUCT_HACK_KR) +     iodim dims[1]; +#elif defined(STRUCT_HACK_C99) +     iodim dims[]; +#else +     iodim *dims; +#endif +} tensor; + +/* +  Definition of rank -infinity. +  This definition has the property that if you want rank 0 or 1, +  you can simply test for rank <= 1.  This is a common case. +  +  A tensor of rank -infinity has size 0. +*/ +#define RNK_MINFTY  ((int)(((unsigned) -1) >> 1)) +#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY) + +typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; + +tensor *X(mktensor)(int rnk); +tensor *X(mktensor_0d)(void); +tensor *X(mktensor_1d)(int n, int is, int os); +tensor *X(mktensor_2d)(int n0, int is0, int os0, +                      int n1, int is1, int os1); +int X(tensor_sz)(const tensor *sz); +void X(tensor_md5)(md5 *p, const tensor *t); +int X(tensor_max_index)(const tensor *sz); +int X(tensor_min_istride)(const tensor *sz); +int X(tensor_min_ostride)(const tensor *sz); +int X(tensor_min_stride)(const tensor *sz); +int X(tensor_inplace_strides)(const tensor *sz); +int X(tensor_inplace_strides2)(const tensor *a, const tensor *b); +tensor *X(tensor_copy)(const tensor *sz); +int X(tensor_kosherp)(const tensor *x); + +tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k); +tensor *X(tensor_copy_except)(const tensor *sz, int except_dim); +tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk); +tensor *X(tensor_compress)(const tensor *sz); +tensor *X(tensor_compress_contiguous)(const tensor *sz); +tensor *X(tensor_append)(const tensor *a, const tensor *b); +void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b); +int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os); +void X(tensor_destroy)(tensor *sz); +void X(tensor_destroy2)(tensor *a, tensor *b); +void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d); +void X(tensor_print)(const tensor *sz, printer *p); +int X(dimcmp)(const iodim *a, const iodim *b); + +/*-----------------------------------------------------------------------*/ +/* problem.c: */ +typedef struct { +     void (*hash) (const problem *ego, md5 *p); +     void (*zero) (const problem *ego); +     void (*print) (problem *ego, printer *p); +     void (*destroy) (problem *ego); +} problem_adt; + +struct problem_s { +     const problem_adt *adt; +}; + +problem *X(mkproblem)(size_t sz, const problem_adt *adt); +void X(problem_destroy)(problem *ego); + +/*-----------------------------------------------------------------------*/ +/* print.c */ +struct printer_s { +     void (*print)(printer *p, const char *format, ...); +     void (*vprint)(printer *p, const char *format, va_list ap); +     void (*putchr)(printer *p, char c); +     void (*cleanup)(printer *p); +     int indent; +     int indent_incr; +}; + +printer *X(mkprinter)(size_t size,  +		      void (*putchr)(printer *p, char c), +		      void (*cleanup)(printer *p)); +void X(printer_destroy)(printer *p); + +/*-----------------------------------------------------------------------*/ +/* scan.c */ +struct scanner_s { +     int (*scan)(scanner *sc, const char *format, ...); +     int (*vscan)(scanner *sc, const char *format, va_list ap); +     int (*getchr)(scanner *sc); +     int ungotc; +}; + +scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)); +void X(scanner_destroy)(scanner *sc); + +/*-----------------------------------------------------------------------*/ +/* plan.c: */ +typedef struct { +     void (*solve)(const plan *ego, const problem *p); +     void (*awake)(plan *ego, int flag); +     void (*print)(const plan *ego, printer *p); +     void (*destroy)(plan *ego); +} plan_adt; + +struct plan_s { +     const plan_adt *adt; +     int awake_refcnt; +     opcnt ops; +     double pcost; +}; + +plan *X(mkplan)(size_t size, const plan_adt *adt); +void X(plan_destroy_internal)(plan *ego); +void X(plan_awake)(plan *ego, int flag); +#define AWAKE(plan, flag) X(plan_awake)(plan, flag) +void X(plan_null_destroy)(plan *ego); + +/*-----------------------------------------------------------------------*/ +/* solver.c: */ +typedef struct { +     plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr); +} solver_adt; + +struct solver_s { +     const solver_adt *adt; +     int refcnt; +}; + +solver *X(mksolver)(size_t size, const solver_adt *adt); +void X(solver_use)(solver *ego); +void X(solver_destroy)(solver *ego); +void X(solver_register)(planner *plnr, solver *s); + +/* shorthand */ +#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt) + +/*-----------------------------------------------------------------------*/ +/* planner.c */ + +typedef struct slvdesc_s { +     solver *slv; +     const char *reg_nam; +     unsigned nam_hash; +     int reg_id; +} slvdesc; + +typedef struct solution_s solution; /* opaque */ + +/* values for problem_flags: */ +enum {  +     DESTROY_INPUT = 0x1, +     NO_SIMD = 0x2, +     CONSERVE_MEMORY = 0x4, +     NO_DHT_R2HC = 0x8 +}; + +#define DESTROY_INPUTP(plnr) ((plnr)->problem_flags & DESTROY_INPUT) +#define NO_SIMDP(plnr) ((plnr)->problem_flags & NO_SIMD) +#define CONSERVE_MEMORYP(plnr) ((plnr)->problem_flags & CONSERVE_MEMORY) +#define NO_DHT_R2HCP(plnr) ((plnr)->problem_flags & NO_DHT_R2HC) + +/* values for planner_flags: */ +enum { +     /* impatience flags  */ + +     BELIEVE_PCOST = 0x1, +     DFT_R2HC_ICKY = 0x2, +     NONTHREADED_ICKY = 0x4, +     NO_BUFFERING = 0x8, +     NO_EXHAUSTIVE = 0x10, +     NO_INDIRECT_OP = 0x20, +     NO_LARGE_GENERIC = 0x40, +     NO_RANK_SPLITS = 0x80, +     NO_VRANK_SPLITS = 0x100, +     NO_VRECURSE = 0x200, +      +     /* flags that control the search */ +     NO_UGLY = 0x400,  /* avoid plans we are 99% sure are suboptimal */ +     NO_SEARCH = 0x800,  /* avoid searching altogether---use wisdom entries  +			    only */ + +     ESTIMATE = 0x1000, +     IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)), +      +     BLESSING = 0x4000,  /* save this entry */ +     H_VALID = 0x8000,    /* valid hastable entry */ +     NONIMPATIENCE_FLAGS = BLESSING +}; + +#define BELIEVE_PCOSTP(plnr) ((plnr)->planner_flags & BELIEVE_PCOST) +#define DFT_R2HC_ICKYP(plnr) ((plnr)->planner_flags & DFT_R2HC_ICKY) +#define ESTIMATEP(plnr) ((plnr)->planner_flags & ESTIMATE) +#define NONTHREADED_ICKYP(plnr) (((plnr)->planner_flags & NONTHREADED_ICKY) \ +                                  && (plnr)->nthr > 1) +#define NO_BUFFERINGP(plnr) ((plnr)->planner_flags & NO_BUFFERING) +#define NO_EXHAUSTIVEP(plnr) ((plnr)->planner_flags & NO_EXHAUSTIVE) +#define NO_INDIRECT_OP_P(plnr) ((plnr)->planner_flags & NO_INDIRECT_OP) +#define NO_LARGE_GENERICP(plnr) ((plnr)->planner_flags & NO_LARGE_GENERIC) +#define NO_RANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_RANK_SPLITS) +#define NO_UGLYP(plnr) ((plnr)->planner_flags & NO_UGLY) +#define NO_SEARCHP(plnr) ((plnr)->planner_flags & NO_SEARCH) +#define NO_VRANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_VRANK_SPLITS) +#define NO_VRECURSEP(plnr) ((plnr)->planner_flags & NO_VRECURSE) + +typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia; + +typedef struct { +     void (*register_solver)(planner *ego, solver *s); +     plan *(*mkplan)(planner *ego, problem *p); +     void (*forget)(planner *ego, amnesia a); +     void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved +						 word in C++. */ +     int (*imprt)(planner *ego, scanner *sc); +} planner_adt; + +struct planner_s { +     const planner_adt *adt; +     void (*hook)(plan *pln, const problem *p, int optimalp); + +     /* solver descriptors */ +     slvdesc *slvdescs; +     unsigned nslvdesc, slvdescsiz; +     const char *cur_reg_nam; +     int cur_reg_id; + +     /* hash table of solutions */ +     solution *solutions; +     unsigned hashsiz, nelem; + +     int nthr; +     unsigned problem_flags; +     unsigned short planner_flags; /* matches type of solution.flags in +				      planner.c */ +     /* various statistics */ +     int nplan;    /* number of plans evaluated */ +     double pcost, epcost; /* total pcost of measured/estimated plans */ +     int nprob;    /* number of problems evaluated */ +     int lookup, succ_lookup, lookup_iter; +     int insert, insert_iter, insert_unknown; +     int nrehash; +}; + +planner *X(mkplanner)(void); +void X(planner_destroy)(planner *ego); + +#ifdef FFTW_DEBUG +void X(planner_dump)(planner *ego, int vrbose); +#endif + +/* +  Iterate over all solvers.   Read: +  +  @article{ baker93iterators, +  author = "Henry G. Baker, Jr.", +  title = "Iterators: Signs of Weakness in Object-Oriented Languages", +  journal = "{ACM} {OOPS} Messenger", +  volume = "4", +  number = "3", +  pages = "18--25" +  } +*/ +#define FORALL_SOLVERS(ego, s, p, what)			\ +{							\ +     unsigned _cnt;					\ +     for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) {	\ +	  slvdesc *p = ego->slvdescs + _cnt;		\ +	  solver *s = p->slv;				\ +	  what;						\ +     }							\ +} + +/* make plan, destroy problem */ +plan *X(mkplan_d)(planner *ego, problem *p); + +/*-----------------------------------------------------------------------*/ +/* stride.c: */ + +/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */ +#if (defined(__i386__) || _M_IX86 >= 500) && !HAVE_K7 && !defined(FFTW_LDOUBLE) +#define PRECOMPUTE_ARRAY_INDICES +#endif + +#ifdef PRECOMPUTE_ARRAY_INDICES +typedef int *stride; +#define WS(stride, i)  (stride[i]) +extern stride X(mkstride)(int n, int s); +void X(stride_destroy)(stride p); + +#else + +typedef int stride; +#define WS(stride, i)  (stride * i) +#define fftwf_mkstride(n, stride) stride +#define fftw_mkstride(n, stride) stride +#define fftwl_mkstride(n, stride) stride +#define fftwf_stride_destroy(p) ((void) p) +#define fftw_stride_destroy(p) ((void) p) +#define fftwl_stride_destroy(p) ((void) p) + +#endif /* PRECOMPUTE_ARRAY_INDICES */ + +/*-----------------------------------------------------------------------*/ +/* solvtab.c */ + +struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; +typedef struct solvtab_s solvtab[]; +void X(solvtab_exec)(solvtab tbl, planner *p); +#define SOLVTAB(s) { s, STRINGIZE(s) } +#define SOLVTAB_END { 0, 0 } + +/*-----------------------------------------------------------------------*/ +/* pickdim.c */ +int X(pickdim)(int which_dim, const int *buddies, int nbuddies, +	       const tensor *sz, int oop, int *dp); + +/*-----------------------------------------------------------------------*/ +/* twiddle.c */ +/* little language to express twiddle factors computation */ +enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3, +       TW_FULL = 4, TW_GENERIC = 5 }; + +typedef struct { +     unsigned char op; +     unsigned char v; +     short i; +} tw_instr; + +typedef struct twid_s { +     R *W;                     /* array of twiddle factors */ +     int n, r, m;                /* transform order, radix, # twiddle rows */ +     int refcnt; +     const tw_instr *instr; +     struct twid_s *cdr; +} twid; + +void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m); +void X(twiddle_destroy)(twid **pp); +int X(twiddle_length)(int r, const tw_instr *p); +void X(twiddle_awake)(int flg, twid **pp,  +		      const tw_instr *instr, int n, int r, int m); + +/*-----------------------------------------------------------------------*/ +/* trig.c */ +#ifdef FFTW_LDOUBLE +typedef long double trigreal; +#else +typedef double trigreal; +#endif + +extern trigreal X(cos2pi)(int, int); +extern trigreal X(sin2pi)(int, int); +extern trigreal X(tan2pi)(int, int); +extern trigreal X(sincos)(trigreal m, trigreal n, int sinp); + +/*-----------------------------------------------------------------------*/ +/* primes.c: */ + +#if defined(FFTW_ENABLE_UNSAFE_MULMOD) +#  define MULMOD(x,y,p) (((x) * (y)) % (p)) +#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG >= 2 * SIZEOF_INT)) +#  define MULMOD(x,y,p) ((int) ((((long) (x)) * ((long) (y))) % ((long) (p)))) +#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT)) +#  define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \ +				 % ((long long) (p)))) +#elif defined(_MSC_VER) +#  define MULMOD(x,y,p) ((int) ((((__int64) (x)) * ((__int64) (y))) \ +                                 % ((__int64) (p)))) +#else /* 'long long' unavailable */ +#  define SAFE_MULMOD 1 +int X(safe_mulmod)(int x, int y, int p); +#  define MULMOD(x,y,p) X(safe_mulmod)(x,y,p) +#endif + +int X(power_mod)(int n, int m, int p); +int X(find_generator)(int p); +int X(first_divisor)(int n); +int X(is_prime)(int n); +int X(next_prime)(int n); + +#define GENERIC_MIN_BAD 71 /* min prime for which generic becomes bad */ + +/*-----------------------------------------------------------------------*/ +/* rader.c: */ +typedef struct rader_tls rader_tl; + +void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl); +R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t); +void X(rader_tl_delete)(R *W, rader_tl **tl); + +/*-----------------------------------------------------------------------*/ +/* transpose.c: */ + +void X(transpose)(R *A, int n, int m, int d, int N, R *buf); +void X(transpose_slow)(R *a, int nx, int ny, int N, +		       char *move, int move_size, R *buf); +int X(transposable)(const iodim *a, const iodim *b, +		    int vl, int s, R *ri, R *ii); +void X(transpose_dims)(const iodim *a, const iodim *b, +                       int *n, int *m, int *d, int *nd, int *md); +int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s, +			 R *ri, R *ii); +int X(transpose_slowp)(const iodim *a, const iodim *b, int N); + +/*-----------------------------------------------------------------------*/ +/* misc stuff */ +void X(null_awake)(plan *ego, int awake); +int X(square)(int x); +double X(measure_execution_time)(plan *pln, const problem *p); +int X(alignment_of)(R *p); +unsigned X(hash)(const char *s); +int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz); +int X(ct_uglyp)(int min_n, int n, int r); + +#if HAVE_SIMD +R *X(taint)(R *p, int s); +R *X(join_taint)(R *p1, R *p2); +#define TAINT(p, s) X(taint)(p, s) +#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3)) +#define TAINTOF(p) (((uintptr_t)(p)) & 3) +#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2) +#else +#define TAINT(p, s) (p) +#define UNTAINT(p) (p) +#define TAINTOF(p) 0 +#define JOIN_TAINT(p1, p2) p1 +#endif + +#ifdef FFTW_DEBUG_ALIGNMENT +#  define ASSERT_ALIGNED_DOUBLE {		\ +     double __foo;				\ +     CK(!(((uintptr_t) &__foo) & 0x7));		\ +} +#else +#  define ASSERT_ALIGNED_DOUBLE  +#endif /* FFTW_DEBUG_ALIGNMENT */ + + + +/*-----------------------------------------------------------------------*/ +/* macros used in codelets to reduce source code size */ + +typedef R E;  /* internal precision of codelets. */ + +#ifdef FFTW_LDOUBLE +#  define K(x) ((E) x##L) +#else +#  define K(x) ((E) x) +#endif +#define DK(name, value) const E name = K(value) + +/* FMA macros */ + +#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__)) +/* this peculiar coding seems to do the right thing on all of +   gcc-2.95, gcc-3.1, and gcc-3.2.   + +   The obvious expression a * b + c does not work.  If both x = a * b +   + c and y = a * b - c appear in the source, gcc computes t = a * b, +   x = t + c, y = t - c, thus destroying the fma. +*/ +static __inline__ E FMA(E a, E b, E c) +{ +     E x = a * b; +     x = x + c; +     return x; +} + +static __inline__ E FMS(E a, E b, E c) +{ +     E x = a * b; +     x = x - c; +     return x; +} + +static __inline__ E FNMA(E a, E b, E c) +{ +     E x = a * b; +     x = - (x + c); +     return x; +} + +static __inline__ E FNMS(E a, E b, E c) +{ +     E x = a * b; +     x = - (x - c); +     return x; +} +#else +#define FMA(a, b, c) (((a) * (b)) + (c)) +#define FMS(a, b, c) (((a) * (b)) - (c)) +#define FNMA(a, b, c) (- (((a) * (b)) + (c))) +#define FNMS(a, b, c) ((c) - ((a) * (b))) +#endif + + +/* stack-alignment hackery */ +#if defined(__GNUC__) && defined(__i386__) +/* + * horrible hack to align the stack to a 16-byte boundary. + * + * We assume a gcc version >= 2.95 so that + * -mpreferred-stack-boundary works.  Otherwise, all bets are + * off.  However, -mpreferred-stack-boundary does not create a + * stack alignment, but it only preserves it.  Unfortunately, + * many versions of libc on linux call main() with the wrong + * initial stack alignment, with the result that the code is now + * pessimally aligned instead of having a 50% chance of being + * correct. + */ + +#define WITH_ALIGNED_STACK(what)				\ +{								\ +     /*								\ +      * Use alloca to allocate some memory on the stack.	\ +      * This alerts gcc that something funny is going		\ +      * on, so that it does not omit the frame pointer		\ +      * etc.							\ +      */							\ +     (void)__builtin_alloca(16);				\ +								\ +     /*								\ +      * Now align the stack pointer				\ +      */							\ +     __asm__ __volatile__ ("andl $-16, %esp");			\ +								\ +     what							\ +} +#endif + +#ifdef __ICC /* Intel's compiler for ia32 */ +#define WITH_ALIGNED_STACK(what)				\ +{								\ +     /*								\ +      * Simply calling alloca seems to do the right thing.	\ +      * The size of the allocated block seems to be irrelevant.	\ +      */							\ +     _alloca(16);						\ +     what							\ +} +#endif + +#ifndef WITH_ALIGNED_STACK +#define WITH_ALIGNED_STACK(what) what +#endif + +#endif /* __IFFTW_H__ */ diff --git a/src/fftw3/kernel/kbuffered.c b/src/fftw3/kernel/kbuffered.c new file mode 100644 index 0000000..d20403f --- /dev/null +++ b/src/fftw3/kernel/kbuffered.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* routines shared by the various buffered solvers */ + +#include "ifftw.h" + +int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz) +{ +     int i;  + +     if (nbuf * n > maxbufsz) +          nbuf = X(imax)((int)1, maxbufsz / n); + +     /* +      * Look for a buffer number (not too big) that divides the +      * vector length, in order that we only need one child plan: +      */ +     for (i = nbuf; i < vl && i < 2 * nbuf; ++i) +          if (vl % i == 0) +               return i; + +     /* whatever... */ +     nbuf = X(imin)(nbuf, vl); +     return nbuf; +} + diff --git a/src/fftw3/kernel/kct.c b/src/fftw3/kernel/kct.c new file mode 100644 index 0000000..ba3bf68 --- /dev/null +++ b/src/fftw3/kernel/kct.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* common routines for Cooley-Tukey algorithms */ + +#include "ifftw.h" + +#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0)) + +/* TRUE if radix-r is ugly for size n */ +int X(ct_uglyp)(int min_n, int n, int r) +{ +     return (n <= min_n) || (POW2P(n) && (n / r) <= 4); +} diff --git a/src/fftw3/kernel/kplan.c b/src/fftw3/kernel/kplan.c new file mode 100644 index 0000000..4c17107 --- /dev/null +++ b/src/fftw3/kernel/kplan.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: kplan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +/* "Plan: To bother about the best method of accomplishing an +   accidental result."  (Ambrose Bierce, The Enlarged Devil's +   Dictionary). */ + +plan *X(mkplan)(size_t size, const plan_adt *adt) +{ +     plan *p = (plan *)MALLOC(size, PLANS); + +     A(adt->destroy); +     p->awake_refcnt = 0; +     p->adt = adt; +     X(ops_zero)(&p->ops); +     p->pcost = 0.0; +      +     return p; +} + +/* + * destroy a plan + */ +void X(plan_destroy_internal)(plan *ego) +{ +     if (ego) { +	  if (ego->awake_refcnt > 0) +	       ego->adt->awake(ego, 0); +          ego->adt->destroy(ego); +	  X(ifree)(ego); +     } +} + +/* dummy destroy routine for plans with no local state */ +void X(plan_null_destroy)(plan *ego) +{ +     UNUSED(ego); +     /* nothing */ +} + +void X(plan_awake)(plan *ego, int flag) +{ +     if (flag) { +	  if (!ego->awake_refcnt) +	       ego->adt->awake(ego, flag); +	  ++ego->awake_refcnt; +     } else { +	  --ego->awake_refcnt; +	  if (!ego->awake_refcnt) +	       ego->adt->awake(ego, flag); +     } +} + diff --git a/src/fftw3/kernel/kproblem.c b/src/fftw3/kernel/kproblem.c new file mode 100644 index 0000000..4686692 --- /dev/null +++ b/src/fftw3/kernel/kproblem.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: kproblem.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +/* constructor */ +problem *X(mkproblem)(size_t sz, const problem_adt *adt) +{ +     problem *p = (problem *)MALLOC(sz, PROBLEMS); + +     p->adt = adt; +     return p; +} + +/* destructor */ +void X(problem_destroy)(problem *ego) +{ +     if (ego) +	  ego->adt->destroy(ego); +} + diff --git a/src/fftw3/kernel/krader.c b/src/fftw3/kernel/krader.c new file mode 100644 index 0000000..354edfc --- /dev/null +++ b/src/fftw3/kernel/krader.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +#include "ifftw.h" + +/* +  common routines for Rader solvers  +*/ + + +/* shared twiddle and omega lists, keyed by two/three integers. */ +struct rader_tls { +     int k1, k2, k3; +     R *W; +     int refcnt; +     rader_tl *cdr;  +}; + +void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl) +{ +     rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES); +     t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W; +     t->refcnt = 1; t->cdr = *tl; *tl = t; +} + +R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t) +{ +     while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3)) +	  t = t->cdr; +     if (t) { +	  ++t->refcnt; +	  return t->W; +     } else  +	  return 0; +} + +void X(rader_tl_delete)(R *W, rader_tl **tl) +{ +     if (W) { +	  rader_tl **tp, *t; + +	  for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr) +	       ; + +	  if (t && --t->refcnt <= 0) { +	       *tp = t->cdr; +	       X(ifree)(t->W); +	       X(ifree)(t); +	  } +     } +} diff --git a/src/fftw3/kernel/md5-1.c b/src/fftw3/kernel/md5-1.c new file mode 100644 index 0000000..a2e87a1 --- /dev/null +++ b/src/fftw3/kernel/md5-1.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +#include "ifftw.h" + + +void X(md5putb)(md5 *p, const void *d_, int len) +{ +     int i; +     const unsigned char *d = (const unsigned char *)d_; +     for (i = 0; i < len; ++i) +	  X(md5putc)(p, d[i]); +} + +void X(md5puts)(md5 *p, const char *s) +{ +     /* also hash final '\0' */ +     do { +	  X(md5putc)(p, *s); +     } while(*s++); +} + +void X(md5int)(md5 *p, int i) +{ +     X(md5putb)(p, &i, sizeof(i)); +} + +void X(md5unsigned)(md5 *p, unsigned i) +{ +     X(md5putb)(p, &i, sizeof(i)); +} + +void X(md5ptrdiff)(md5 *p, ptrdiff_t d) +{ +     X(md5putb)(p, &d, sizeof(d)); +} + diff --git a/src/fftw3/kernel/md5.c b/src/fftw3/kernel/md5.c new file mode 100644 index 0000000..edbd811 --- /dev/null +++ b/src/fftw3/kernel/md5.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/*  +   independent implementation of Ron Rivest's MD5 message-digest +   algorithm, based on rfc 1321. + +   Optimized for small code size, not speed.  Works as long as +   sizeof(md5uint) >= 4. +*/ + +#include "ifftw.h" + +/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */ +static const md5uint sintab[64] = { +     0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, +     0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, +     0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, +     0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, +     0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, +     0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, +     0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, +     0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, +     0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, +     0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, +     0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, +     0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, +     0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, +     0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, +     0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, +     0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 +};  + +/* see rfc 1321 section 3.4 */ +static const struct roundtab { +     char k;  +     char s; +} roundtab[64] = { +     {  0,  7}, {  1, 12}, {  2, 17}, {  3, 22}, +     {  4,  7}, {  5, 12}, {  6, 17}, {  7, 22}, +     {  8,  7}, {  9, 12}, { 10, 17}, { 11, 22}, +     { 12,  7}, { 13, 12}, { 14, 17}, { 15, 22}, +     {  1,  5}, {  6,  9}, { 11, 14}, {  0, 20}, +     {  5,  5}, { 10,  9}, { 15, 14}, {  4, 20}, +     {  9,  5}, { 14,  9}, {  3, 14}, {  8, 20}, +     { 13,  5}, {  2,  9}, {  7, 14}, { 12, 20}, +     {  5,  4}, {  8, 11}, { 11, 16}, { 14, 23}, +     {  1,  4}, {  4, 11}, {  7, 16}, { 10, 23}, +     { 13,  4}, {  0, 11}, {  3, 16}, {  6, 23}, +     {  9,  4}, { 12, 11}, { 15, 16}, {  2, 23}, +     {  0,  6}, {  7, 10}, { 14, 15}, {  5, 21}, +     { 12,  6}, {  3, 10}, { 10, 15}, {  1, 21}, +     {  8,  6}, { 15, 10}, {  6, 15}, { 13, 21}, +     {  4,  6}, { 11, 10}, {  2, 15}, {  9, 21} +}; + +#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s)))) + +static void doblock(md5sig state, const unsigned char *data) +{ +     md5uint a, b, c, d, t, x[16]; +     const md5uint msk = 0xffffffffUL; +     int i; + +     /* encode input bytes into md5uint */ +     for (i = 0; i < 16; ++i) { +	  const unsigned char *p = data + 4 * i; +	  x[i] = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); +     } + +     a = state[0]; b = state[1]; c = state[2]; d = state[3]; +     for (i = 0; i < 64; ++i) { +	  const struct roundtab *p = roundtab + i; +	  int round = i / 16; +	  switch (round) { +	      case 0: a += (b & c) | (~b & d); break; +	      case 1: a += (b & d) | (c & ~d); break; +	      case 2: a += b ^ c ^ d; break; +	      case 3: a += c ^ (b | ~d); break; +	  } +	  a += sintab[i]; +	  a += x[(int)(p->k)]; +	  a &= msk; +	  t = b + rol(a, p->s); +	  a = d; d = c; c = b; b = t; +     } +     state[0] = (state[0] + a) & msk; +     state[1] = (state[1] + b) & msk; +     state[2] = (state[2] + c) & msk; +     state[3] = (state[3] + d) & msk; +} + + +void X(md5begin)(md5 *p) +{ +     p->s[0] = 0x67452301; +     p->s[1] = 0xefcdab89; +     p->s[2] = 0x98badcfe; +     p->s[3] = 0x10325476; +     p->l = 0; +} + +void X(md5putc)(md5 *p, unsigned char c) +{ +     p->c[p->l % 64] = c; +     if (((++p->l) % 64) == 0) doblock(p->s, p->c); +} + +void X(md5end)(md5 *p) +{ +     unsigned l, i; + +     l = 8 * p->l; /* length before padding, in bits */ + +     /* rfc 1321 section 3.1: padding */ +     X(md5putc)(p, 0x80); +     while ((p->l % 64) != 56) X(md5putc)(p, 0x00); + +     /* rfc 1321 section 3.2: length (little endian) */ +     for (i = 0; i < 8; ++i) { +	  X(md5putc)(p, l & 0xFF); +	  l = l >> 8; +     } + +     /* Now p->l % 64 == 0 and signature is in p->s */ +} diff --git a/src/fftw3/kernel/minmax.c b/src/fftw3/kernel/minmax.c new file mode 100644 index 0000000..fc6f3a3 --- /dev/null +++ b/src/fftw3/kernel/minmax.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: minmax.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +int X(imax)(int a, int b) +{ +     return (a > b) ? a : b; +} + +int X(imin)(int a, int b) +{ +     return (a < b) ? a : b; +} diff --git a/src/fftw3/kernel/ops.c b/src/fftw3/kernel/ops.c new file mode 100644 index 0000000..9e927c1 --- /dev/null +++ b/src/fftw3/kernel/ops.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: ops.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +void X(ops_zero)(opcnt *dst) +{ +     dst->add = dst->mul = dst->fma = dst->other = 0; +} + +void X(ops_cpy)(const opcnt *src, opcnt *dst) +{ +     *dst = *src; +} + +void X(ops_other)(int o, opcnt *dst) +{ +     X(ops_zero)(dst); +     dst->other = o; +} + +void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst) +{ +     dst->add = m * a->add + b->add; +     dst->mul = m * a->mul + b->mul; +     dst->fma = m * a->fma + b->fma; +     dst->other = m * a->other + b->other; +} + +void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst) +{ +     X(ops_madd)(1, a, b, dst); +} + +void X(ops_add2)(const opcnt *a, opcnt *dst) +{ +     X(ops_add)(a, dst, dst); +} + +void X(ops_madd2)(int m, const opcnt *a, opcnt *dst) +{ +     X(ops_madd)(m, a, dst, dst); +} + diff --git a/src/fftw3/kernel/pickdim.c b/src/fftw3/kernel/pickdim.c new file mode 100644 index 0000000..034e6db --- /dev/null +++ b/src/fftw3/kernel/pickdim.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +#include "ifftw.h" + +/* $Id: pickdim.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* Given a solver which_dim, a vector sz, and whether or not the +   transform is out-of-place, return the actual dimension index that +   it corresponds to.  The basic idea here is that we return the +   which_dim'th valid dimension, starting from the end if +   which_dim < 0. */ +static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp) +{ +     int i; +     int count_ok = 0; +     if (which_dim > 0) { +          for (i = 0; i < sz->rnk; ++i) { +               if (oop || sz->dims[i].is == sz->dims[i].os) +                    if (++count_ok == which_dim) { +                         *dp = i; +                         return 1; +                    } +          } +     } +     else if (which_dim < 0) { +          for (i = sz->rnk; i > 0; --i) { +               if (oop || sz->dims[i - 1].is == sz->dims[i - 1].os) +                    if (++count_ok == -which_dim) { +                         *dp = i - 1; +                         return 1; +                    } +          } +     } +     else { /* zero: pick the middle, if valid */ +	  i = sz->rnk / 2 - 1; +	  if (i < sz->rnk && (oop || sz->dims[i].is == sz->dims[i].os)) { +	       *dp = i; +	       return 1; +	  } +     } +     return 0; +} + +/* Like really_pickdim, but only returns 1 if no previous "buddy" +   which_dim in the buddies list would give the same dim. */ +int X(pickdim)(int which_dim, const int *buddies, int nbuddies, +	       const tensor *sz, int oop, int *dp) +{ +     int i, d1; + +     if (!really_pickdim(which_dim, sz, oop, dp)) +          return 0; + +     /* check whether some buddy solver would produce the same dim. +        If so, consider this solver unapplicable and let the buddy +        take care of it.  The smallest-indexed buddy is applicable. */ +     for (i = 0; i < nbuddies; ++i) { +          if (buddies[i] == which_dim) +               break;  /* found self */ +          if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1) +               return 0; /* found equivalent buddy */ +     } +     return 1; +} diff --git a/src/fftw3/kernel/planner.c b/src/fftw3/kernel/planner.c new file mode 100644 index 0000000..1d01cb9 --- /dev/null +++ b/src/fftw3/kernel/planner.c @@ -0,0 +1,695 @@ +/* + * Copyright (c) 2000 Matteo Frigo + * Copyright (c) 2000 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: planner.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ +#include "ifftw.h" +#include <string.h> + +/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU +   program in English, because English is the one language that nearly +   all programmers in all countries can read." + +                    ingemisco tanquam reus +		    culpa rubet vultus meus +		    supplicanti parce [rms] +*/ + +#define BLESSEDP(solution) ((solution)->flags & BLESSING) +#define VALIDP(solution) ((solution)->flags & H_VALID) +#define UNBLESS(flags)  flags &= ~BLESSING + +#define MAXNAM 64  /* maximum length of registrar's name. +		      Used for reading wisdom.  There is no point +		      in doing this right */ + +/* Flags f1 subsumes flags f2 iff f1 is less/equally impatient than +   f2, defining a partial ordering. */ +#define IMPATIENCE(flags) ((flags) & IMPATIENCE_FLAGS) +#define NONIMPATIENCE(flags) ((flags) & NONIMPATIENCE_FLAGS) +#define ORDERED(f1, f2) (SUBSUMES(f1, f2) || SUBSUMES(f2, f1)) +#define SUBSUMES(f1, f2) ((IMPATIENCE(f1) & (f2)) == IMPATIENCE(f1)) + +static unsigned addmod(unsigned a, unsigned b, unsigned p) +{ +     /* gcc-2.95/sparc produces incorrect code for the fast version below. */ +#if defined(__sparc__) && defined(__GNUC__) +     /* slow version  */ +     return (a + b) % p; +#else +     /* faster version */ +     unsigned c = a + b; +     return c >= p ? c - p : c; +#endif +} + +/* +  slvdesc management: +*/ +static void sgrow(planner *ego) +{ +     unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; +     slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); +     slvdesc *otab = ego->slvdescs; +     unsigned i; + +     ego->slvdescs = ntab; +     ego->slvdescsiz = nsiz; +     for (i = 0; i < osiz; ++i) +	  ntab[i] = otab[i]; +     X(ifree0)(otab); +} + +static void register_solver(planner *ego, solver *s) +{ +     slvdesc *n; +     if (s) { /* add s to solver list */ +	  X(solver_use)(s); + +	  if (ego->nslvdesc >= ego->slvdescsiz) +	       sgrow(ego); + +	  n = ego->slvdescs + ego->nslvdesc++; + +	  n->slv = s; +	  n->reg_nam = ego->cur_reg_nam; +	  n->reg_id = ego->cur_reg_id++; +	   +	  A(strlen(n->reg_nam) < MAXNAM); +	  n->nam_hash = X(hash)(n->reg_nam); +     } +} + +static int slookup(planner *ego, char *nam, int id) +{ +     unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ +     FORALL_SOLVERS(ego, s, sp, { +	  UNUSED(s); +	  if (sp->reg_id == id && sp->nam_hash == h +	      && !strcmp(sp->reg_nam, nam)) +	       return sp - ego->slvdescs; +     }); +     return -1; +} + +/* +  md5-related stuff: +*/ + +/* first hash function */ +static unsigned h1(planner *ego, const md5sig s) +{ +     return s[0] % ego->hashsiz; +} + +/* second hash function (for double hashing) */ +static unsigned h2(planner *ego, const md5sig s) +{ +     return 1U + s[1] % (ego->hashsiz - 1); +} + +static void md5hash(md5 *m, const problem *p, const planner *plnr) +{ +     X(md5begin)(m); +     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ +     X(md5unsigned)(m, plnr->problem_flags); +     X(md5int)(m, plnr->nthr); +     p->adt->hash(p, m); +     X(md5end)(m); +} + +static int md5eq(const md5sig a, const md5sig b) +{ +     return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; +} + +static void sigcpy(const md5sig a, md5sig b) +{ +     b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; +} + +/* +  memoization routines : +*/ + +/* +   liber scriptus proferetur +   in quo totum continetur +   unde mundus iudicetur +*/ +struct solution_s { +     md5sig s; +     unsigned short flags; +     short slvndx; +}; + +static solution *hlookup(planner *ego, const md5sig s, unsigned short flags) +{ +     unsigned g, h = h1(ego, s), d = h2(ego, s); + +     ++ego->lookup; + +     for (g = h; ; g = addmod(g, d, ego->hashsiz)) { +	  solution *l = ego->solutions + g; +	  ++ego->lookup_iter; +	  if (VALIDP(l)) { +	       if (md5eq(s, l->s) && ORDERED(l->flags, flags)) {  +		    ++ego->succ_lookup; +		    return l;  +	       } +	  } else { +	       return 0; +	  } +	  A((g + d) % ego->hashsiz != h); +     } +} + + +static void hinsert0(planner *ego, const md5sig s, unsigned short flags, +		     int slvndx, solution *l) +{ +     ++ego->insert; +     if (!l) { 	  +	  /* search for nonfull slot */ +	  unsigned g, h = h1(ego, s), d = h2(ego, s);  +	  ++ego->insert_unknown; +	  for (g = h; ; g = addmod(g, d, ego->hashsiz)) { +	       ++ego->insert_iter; +	       l = ego->solutions + g; +	       if (!VALIDP(l)) break; +	       A((g + d) % ego->hashsiz != h); +	  } +     } + +     /* fill slot */ +     l->flags = flags | H_VALID; +     l->slvndx = (short)slvndx; +     sigcpy(s, l->s); +} + +static void rehash(planner *ego, unsigned nsiz) +{ +     unsigned osiz = ego->hashsiz, h; +     solution *osol = ego->solutions, *nsol; + +     nsiz = (unsigned)X(next_prime)((int)nsiz); +     nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); +     ++ego->nrehash; + +     /* init new table */ +     for (h = 0; h < nsiz; ++h)  +	  nsol[h].flags = 0; + +     /* install new table */ +     ego->hashsiz = nsiz; +     ego->solutions = nsol; + +     /* copy table */ +     for (h = 0; h < osiz; ++h) { +	  solution *l = osol + h; +	  if (VALIDP(l)) +	       hinsert0(ego, l->s, l->flags, l->slvndx, 0); +     } + +     X(ifree0)(osol); +} + +static unsigned minsz(unsigned nelem) +{ +     return 1U + nelem + nelem / 8U; +} + +static unsigned nextsz(unsigned nelem) +{ +     return minsz(minsz(nelem)); +} + +static void hgrow(planner *ego) +{ +     unsigned nelem = ego->nelem; +     if (minsz(nelem) >= ego->hashsiz) +	  rehash(ego, nextsz(nelem)); +} + +static void hshrink(planner *ego) +{ +     unsigned nelem = ego->nelem; +     /* always rehash after deletions */ +     rehash(ego, nextsz(nelem)); +} + +/* inherit blessing, but only if the solver is the same */ +static unsigned short merge_flags(unsigned short dstflags, int dstndx, +				  unsigned short srcflags, int srcndx) +{ +     if (srcndx == dstndx) +	  dstflags |= (srcflags & BLESSING); /* ne me perdas illa die */ +     return dstflags; +} + +static void hinsert(planner *ego, const md5sig s,  +		    unsigned short flags, int slvndx) +{ +     solution *l; + +     if ((l = hlookup(ego, s, flags))) { +	  if (SUBSUMES(flags, l->flags)) { +	       /* overwrite old solution */ +	       flags = merge_flags(flags, slvndx, l->flags, l->slvndx); +	  } else { +	       A(SUBSUMES(l->flags, flags)); +	       l->flags = merge_flags(l->flags, l->slvndx, flags, slvndx); +	       return; +	  } +     } else { +	  ++ego->nelem; +	  hgrow(ego); +     } +     hinsert0(ego, s, flags, slvndx, l); +} + +static void hcurse_subsumed(planner *ego) +{ +     unsigned h; + +     /* unbless any entries that are unreachable because they +        are subsumed by less-impatient ones.  */ +     for (h = 0; h < ego->hashsiz; ++h) { +	  solution *l = ego->solutions + h; +	  if (VALIDP(l)) { +	       unsigned d = h2(ego, l->s), g = addmod(h, d, ego->hashsiz); +	       for (; ; g = addmod(g, d, ego->hashsiz)) { +		    solution *m = ego->solutions + g; +		    if (VALIDP(m)) { +			 if (md5eq(l->s, m->s) && +			     SUBSUMES(l->flags, m->flags)) { +			      /* ne cadant in obscurum */ +			      l->flags = merge_flags(l->flags, l->slvndx, +						     m->flags, m->slvndx); + +			      /* cum vix justus sit securus */ +			      UNBLESS(m->flags); +			 } +		    } +		    else break; +		    A((g + d) % ego->hashsiz != h); +	       } +	  } +     } +} + + +static void invoke_hook(planner *ego, plan *pln, const problem *p,  +			int optimalp) +{ +     if (ego->hook) +	  ego->hook(pln, p, optimalp); +} + +static void evaluate_plan(planner *ego, plan *pln, const problem *p) +{ +     if (!BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { +	  ego->nplan++; + +	  if (ESTIMATEP(ego)) { +	  estimate: +	       /* heuristic */ +	       pln->pcost = 0.0 +		    + pln->ops.add +		    + pln->ops.mul +		    + 2 * pln->ops.fma +		    + pln->ops.other; +	       ego->epcost += pln->pcost; +	  } else { +	       double t = X(measure_execution_time)(pln, p); + +	       if (t < 0) {  /* unavailable cycle counter */ +		    /* Real programmers can write FORTRAN in any language */ +		    goto estimate; +	       } + +	       pln->pcost = t; +	       ego->pcost += t; +	  } +     } +      +     invoke_hook(ego, pln, p, 0); +} + +/* maintain dynamic scoping of flags, nthr: */ +static plan *invoke_solver(planner *ego, problem *p, solver *s,  +			   unsigned short nflags) +{ +     unsigned short planner_flags = ego->planner_flags; +     unsigned problem_flags = ego->problem_flags; +     int nthr = ego->nthr; +     plan *pln; +     ego->planner_flags = nflags; +     pln = s->adt->mkplan(s, p, ego); +     ego->problem_flags = problem_flags; +     ego->nthr = nthr; +     ego->planner_flags = planner_flags; +     return pln; +} + +static plan *search(planner *ego, problem *p, slvdesc **descp) +{ +     plan *best = 0; +     int best_not_yet_timed = 1; +     int pass; + +     if (NO_SEARCHP(ego)) { +	  /* D("invalid search for %P %x\n", p, ego->planner_flags); */ +	  return 0; +     } + +     for (pass = 0; pass < 2; ++pass) { +	  unsigned short nflags = ego->planner_flags; +	   +	  if (best) break; + +	  switch (pass) { +	      case 0:  +		   /* skip pass 0 during exhaustive search */ +		   if (!NO_EXHAUSTIVEP(ego)) continue; +		   nflags |= NO_UGLY; +		   break; +	      case 1: +		   /* skip pass 1 if NO_UGLY */ +		   if (NO_UGLYP(ego)) continue; +		   break; +	  } + +          FORALL_SOLVERS(ego, s, sp, { +	       plan *pln = invoke_solver(ego, p, s, nflags); + +	       if (pln) { +		    if (best) { +			 if (best_not_yet_timed) { +			      evaluate_plan(ego, best, p); +			      best_not_yet_timed = 0; +			 } +			 evaluate_plan(ego, pln, p); +			 if (pln->pcost < best->pcost) { +			      X(plan_destroy_internal)(best); +			      best = pln; +			      *descp = sp; +			 } else { +			      X(plan_destroy_internal)(pln); +			 } +		    } else { +			 best = pln; +			 *descp = sp; +		    } +	       } +	  }); +     } + +     return best; +} + +static plan *mkplan(planner *ego, problem *p) +{ +     plan *pln; +     md5 m; +     slvdesc *sp; +     unsigned short flags; +     ASSERT_ALIGNED_DOUBLE; + +     /* Canonical form. */ +     if (!NO_EXHAUSTIVEP(ego)) ego->planner_flags &= ~NO_UGLY; +	   +     ++ego->nprob; +     md5hash(&m, p, ego); + +     pln = 0; + +     { +	  solution *sol; /* new scope for sol */ + +	  if ((sol = hlookup(ego, m.s, ego->planner_flags))) { +	       if (SUBSUMES(sol->flags, ego->planner_flags)) { +		    /* wisdom is acceptable */ +		    if (sol->slvndx < 0)  +			 return 0;   /* known to be infeasible */ + +		    /* use solver to obtain a plan */ +		    sp = ego->slvdescs + sol->slvndx; +		    pln =  +			 invoke_solver(ego, p, sp->slv,  +				       (0 +					| NO_SEARCH  +					| IMPATIENCE(sol->flags) +					| NONIMPATIENCE(ego->planner_flags) )); + +		    /* if (!pln) then the entry is bogus, but +		       we currently do nothing about it. */ +		    /* CAVEAS: Do not use ``sol'' here, because the +		       pointer is possibly dangling after the call to +		       invoke_solver(). */ +	       } else { +		    A(SUBSUMES(ego->planner_flags, sol->flags)); +	       } +	  } +     } + + +     if (!pln) +	  pln = search(ego, p, &sp); + +     flags = ego->planner_flags; + +     if (pln) { +	  /* Postulate de iure that NO_UGLY subsumes ~NO_UGLY if the +	     problem is feasible. Also postulate that NO_SEARCH +	     subsumes ~NO_SEARCH. */ +	  flags &= ~(NO_UGLY | NO_SEARCH); +     } + +     hinsert(ego, m.s, flags, pln ? sp - ego->slvdescs : -1); + +     if (pln) +	  invoke_hook(ego, pln, p, 1); +     return pln; +} + +/* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole +   table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */ +static void forget(planner *ego, amnesia a) +{ +     unsigned h; + +     /* garbage-collect while we are at it */  +     if (a != FORGET_EVERYTHING) +	  hcurse_subsumed(ego); + +     for (h = 0; h < ego->hashsiz; ++h) { +	  solution *l = ego->solutions + h; +	  if (VALIDP(l)) { +	       if (a == FORGET_EVERYTHING || +		   (a == FORGET_ACCURSED && !BLESSEDP(l))) { +		    /* confutatis maledictis +		       flammis acribus addictis */ +		    l->flags &= ~H_VALID; +		    --ego->nelem; +	       } +	  } +     } +     /* nil inultum remanebit */ + +     hshrink(ego); +} + +static void htab_destroy(planner *ego) +{ +     forget(ego, FORGET_EVERYTHING); +     X(ifree)(ego->solutions); +     ego->nelem = 0U; +} + +/* FIXME: what sort of version information should we write? */ +#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) + +/* tantus labor non sit cassus */ +static void exprt(planner *ego, printer *p) +{ +     unsigned h; + +     hcurse_subsumed(ego); + +     p->print(p, "(" WISDOM_PREAMBLE "%("); +     for (h = 0; h < ego->hashsiz; ++h) { +	  solution *l = ego->solutions + h; +	  if (VALIDP(l) && BLESSEDP(l) && l->slvndx >= 0) { +	       slvdesc *sp = ego->slvdescs + l->slvndx; +	       /* qui salvandos salvas gratis +		  salva me fons pietatis */ +	       p->print(p, "(%s %d #x%x #x%M #x%M #x%M #x%M)\n", +			sp->reg_nam, sp->reg_id, (int)l->flags, +			l->s[0], l->s[1], l->s[2], l->s[3]); +	  } +     } +     p->print(p, "%))\n"); +} + +/* mors stupebit et natura +   cum resurget creatura */ +static int imprt(planner *ego, scanner *sc) +{ +     char buf[MAXNAM + 1]; +     md5uint sig[4]; +     int flags; +     int reg_id; +     int slvndx; +     solution *sol; + +     if (!sc->scan(sc, "(" WISDOM_PREAMBLE)) +	  return 0; /* don't need to restore hashtable */ + +     /* make a backup copy of the hash table (cache the hash) */ +     { +	  unsigned h, hsiz = ego->hashsiz; +	  sol = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); +	  for (h = 0; h < hsiz; ++h) +	       sol[h] = ego->solutions[h]; +     } + +     while (1) { +	  if (sc->scan(sc, ")")) +	       break; + +	  /* qua resurget ex favilla */ +	  if (!sc->scan(sc, "(%*s %d #x%x #x%M #x%M #x%M #x%M)", +			MAXNAM, buf, ®_id, &flags,  +			sig + 0, sig + 1, sig + 2, sig + 3)) +	       goto bad; + +	  if ((slvndx = slookup(ego, buf, reg_id)) < 0) +	       goto bad; + +	  /* inter oves locum praesta */ +	  hinsert(ego, sig, (unsigned short)flags, slvndx); +     } + +     X(ifree0)(sol); +     return 1; + + bad: +     /* ``The wisdom of FFTW must be above suspicion.'' */ +     X(ifree0)(ego->solutions); +     ego->solutions = sol; +     return 0; +} + +/* + * create a planner + */ +planner *X(mkplanner)(void) +{ +     static const planner_adt padt = { +	  register_solver, mkplan, forget, exprt, imprt +     }; + +     planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); + +     p->adt = &padt; +     p->nplan = p->nprob = p->nrehash = 0; +     p->pcost = p->epcost = 0.0; +     p->succ_lookup = p->lookup = p->lookup_iter = 0; +     p->insert = p->insert_iter = p->insert_unknown = 0; +     p->hook = 0; +     p->cur_reg_nam = 0; + +     p->slvdescs = 0; +     p->nslvdesc = p->slvdescsiz = 0; + +     p->solutions = 0; +     p->hashsiz = p->nelem = 0U; + +     p->problem_flags = 0; +     p->planner_flags = 0; +     p->nthr = 1; + +     hgrow(p);			/* so that hashsiz > 0 */ + +     return p; +} + +void X(planner_destroy)(planner *ego) +{ +     /* destroy hash table */ +     htab_destroy(ego); + +     /* destroy solvdesc table */ +     FORALL_SOLVERS(ego, s, sp, { +	  UNUSED(sp); +	  X(solver_destroy)(s); +     }); + +     X(ifree0)(ego->slvdescs); +     X(ifree)(ego); /* dona eis requiem */ +} + +plan *X(mkplan_d)(planner *ego, problem *p) +{ +     plan *pln = ego->adt->mkplan(ego, p); +     X(problem_destroy)(p); +     return pln; +} + +/* + * Debugging code: + */ +#ifdef FFTW_DEBUG + +void X(planner_dump)(planner *ego, int verbose) +{ +     unsigned valid = 0, empty = 0, infeasible = 0; +     unsigned h; +     UNUSED(verbose); /* historical */ + +     for (h = 0; h < ego->hashsiz; ++h) { +	  solution *l = ego->solutions + h;  +	  if (VALIDP(l)) { +	       ++valid;  +	       if (l->slvndx < 0) ++infeasible; +	  } else +	       ++empty; +	   +     } + +     D("nplan = %d\n", ego->nplan); +     D("nprob = %d\n", ego->nprob); +     D("pcost = %g\n", ego->pcost); +     D("epcost = %g\n", ego->epcost); +     D("lookup = %d\n", ego->lookup); +     D("succ_lookup = %d\n", ego->succ_lookup); +     D("lookup_iter = %d\n", ego->lookup_iter); +     D("insert = %d\n", ego->insert); +     D("insert_iter = %d\n", ego->insert_iter); +     D("insert_unknown = %d\n", ego->insert_unknown); +     D("nrehash = %d\n", ego->nrehash); +     D("hashsiz = %u\n", ego->hashsiz); +     D("empty = %d\n", empty); +     D("valid = %d\n", valid); +     D("infeasible = %d\n", infeasible); +     A(ego->nelem == valid); +} + +#endif diff --git a/src/fftw3/kernel/primes.c b/src/fftw3/kernel/primes.c new file mode 100644 index 0000000..608e51c --- /dev/null +++ b/src/fftw3/kernel/primes.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: primes.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +/***************************************************************************/ + +/* Rader's algorithm requires lots of modular arithmetic, and if we +   aren't careful we can have errors due to integer overflows. */ + +#ifdef SAFE_MULMOD + +#  include <limits.h> + +/* compute (x * y) mod p, but watch out for integer overflows; we must +   have x, y >= 0, p > 0.  This routine is slow. */ +int X(safe_mulmod)(int x, int y, int p) +{ +     if (y == 0 || x <= INT_MAX / y) +	  return((x * y) % p); +     else { +	  int y2 = y/2; +	  return((X(safe_mulmod)(x, y2, p) + +		  X(safe_mulmod)(x, y - y2, p)) % p); +     } +} +#endif /* safe_mulmod ('long long' unavailable) */ + +/***************************************************************************/ + +/* Compute n^m mod p, where m >= 0 and p > 0.  If we really cared, we +   could make this tail-recursive. */ +int X(power_mod)(int n, int m, int p) +{ +     A(p > 0); +     if (m == 0) +	  return 1; +     else if (m % 2 == 0) { +	  int x = X(power_mod)(n, m / 2, p); +	  return MULMOD(x, x, p); +     } +     else +	  return MULMOD(n, X(power_mod)(n, m - 1, p), p); +} + +/* the following two routines were contributed by Greg Dionne. */ +static int get_prime_factors(int n, int *primef) +{ +     int i; +     int size = 0; + +     primef[size++] = 2; +     do +	  n >>= 1; +     while ((n & 1) == 0); + +     if (n == 1) +	  return size; + +     for (i = 3; i * i <= n; i += 2) +	  if (!(n % i)) { +	       primef[size++] = i; +	       do +		    n /= i; +	       while (!(n % i)); +	  } +     if (n == 1) +	  return size; +     primef[size++] = n; +     return size; +} + +int X(find_generator)(int p) +{ +    int n, i, size; +    int primef[16];     /* smallest number = 32589158477190044730 > 2^64 */ +    int pm1 = p - 1; + +    if (p == 2) +	 return 1; + +    size = get_prime_factors(pm1, primef); +    n = 2; +    for (i = 0; i < size; i++) +        if (X(power_mod)(n, pm1 / primef[i], p) == 1) { +            i = -1; +            n++; +        } +    return n; +} + +/* Return first prime divisor of n  (It would be at best slightly faster to +   search a static table of primes; there are 6542 primes < 2^16.)  */ +int X(first_divisor)(int n) +{ +     int i; +     if (n <= 1) +	  return n; +     if (n % 2 == 0) +	  return 2; +     for (i = 3; i*i <= n; i += 2) +	  if (n % i == 0) +	       return i; +     return n; +} + +int X(is_prime)(int n) +{ +     return(n > 1 && X(first_divisor)(n) == n); +} + +int X(next_prime)(int n) +{ +     while (!X(is_prime)(n)) ++n; +     return n; +} diff --git a/src/fftw3/kernel/print.c b/src/fftw3/kernel/print.c new file mode 100644 index 0000000..314be7c --- /dev/null +++ b/src/fftw3/kernel/print.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: print.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" +#include <stddef.h> +#include <stdarg.h> +#include <stdio.h> + +#define BSZ 64 + +static void myputs(printer *p, const char *s) +{ +     char c; +     while ((c = *s++)) +          p->putchr(p, c); +} + +static void vprint(printer *p, const char *format, va_list ap) +{ +     char buf[BSZ]; +     const char *s = format; +     char c; +     int i; + +     for (i = 0; i < p->indent; ++i) +          p->putchr(p, ' '); + +     while ((c = *s++)) { +          switch (c) { +	      case '%': +		   switch ((c = *s++)) { +		       case 'M': { +			    /* md5 value */ +			    md5uint x = va_arg(ap, md5uint); +			    x = 0xffffffffUL & x; +			    sprintf(buf, "%8.8lx", (unsigned long)x); +			    goto putbuf; +		       } +		       case 'c': { +			    int x = va_arg(ap, int); +			    p->putchr(p, x); +			    break; +		       } +		       case 's': { +			    char *x = va_arg(ap, char *); +			    if (x) +				 myputs(p, x); +			    else +				 goto putnull; +			    break; +		       } +		       case 'd': { +			    int x = va_arg(ap, int); +			    sprintf(buf, "%d", x); +			    goto putbuf; +		       } +		       case 't': { +			    ptrdiff_t x; +			    A(*s == 'd'); +			    s += 1; +			    x = va_arg(ap, ptrdiff_t); +			    /* should use C99 %td here, but +			       this is not yet widespread enough */ +			    sprintf(buf, "%ld", (long) x); +			    goto putbuf; +		       } +		       case 'f': case 'e': case 'g': { +			    char fmt[3] = "%x"; +			    double x = va_arg(ap, double); +			    fmt[1] = c; +			    sprintf(buf, fmt, x); +			    goto putbuf; +		       } +		       case 'v': { +			    /* print optional vector length */ +			    int x = va_arg(ap, int); +			    if (x > 1) { +				 sprintf(buf, "-x%d", x); +				 goto putbuf; +			    } +			    break; +		       } +		       case 'o': { +			    /* integer option.  Usage: %oNAME= */ +			    int x = va_arg(ap, int); +			    if (x) +				 p->putchr(p, '/'); +			    while ((c = *s++) != '=') +				 if (x) +				      p->putchr(p, c); +			    if (x) { +				 sprintf(buf, "=%d", x); +				 goto putbuf; +			    } +			    break; +		       } +		       case 'u': { +			    unsigned x = va_arg(ap, unsigned); +			    sprintf(buf, "%u", x); +			    goto putbuf; +		       } +		       case 'x': { +			    unsigned x = va_arg(ap, unsigned); +			    sprintf(buf, "%x", x); +			    goto putbuf; +		       } +		       case '(': { +			    /* newline, augment indent level */ +			    p->putchr(p, '\n'); +			    p->indent += p->indent_incr; +			    break; +		       } +		       case ')': { +			    /* decrement indent level */ +			    p->indent -= p->indent_incr; +			    break; +		       } +		       case 'p': {  /* note difference from C's %p */ +			    /* print plan */ +			    plan *x = va_arg(ap, plan *); +			    if (x)  +				 x->adt->print(x, p); +			    else  +				 goto putnull; +			    break; +		       } +		       case 'P': { +			    /* print problem */ +			    problem *x = va_arg(ap, problem *); +			    if (x) +				 x->adt->print(x, p); +			    else +				 goto putnull; +			    break; +		       } +		       case 'T': { +			    /* print tensor */ +			    tensor *x = va_arg(ap, tensor *); +			    if (x) +				 X(tensor_print)(x, p); +			    else +				 goto putnull; +			    break; +		       } +		       default: +			    A(0 /* unknown format */); +			    break; + +		   putbuf: +			    myputs(p, buf); +			    break; +		   putnull: +			    myputs(p, "(null)"); +			    break; +		   } +		   break; +	      default: +		   p->putchr(p, c); +		   break; +          } +     } +} + +static void print(printer *p, const char *format, ...) +{ +     va_list ap; +     va_start(ap, format); +     vprint(p, format, ap); +     va_end(ap); +} + +printer *X(mkprinter)(size_t size,  +		      void (*putchr)(printer *p, char c), +		      void (*cleanup)(printer *p)) +{ +     printer *s = (printer *)MALLOC(size, OTHER); +     s->print = print; +     s->vprint = vprint; +     s->putchr = putchr; +     s->cleanup = cleanup; +     s->indent = 0; +     s->indent_incr = 2; +     return s; +} + +void X(printer_destroy)(printer *p) +{ +     if (p->cleanup) +	  p->cleanup(p); +     X(ifree)(p); +} diff --git a/src/fftw3/kernel/scan.c b/src/fftw3/kernel/scan.c new file mode 100644 index 0000000..f5fa6e1 --- /dev/null +++ b/src/fftw3/kernel/scan.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: scan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" +#include <string.h> +#include <stddef.h> +#include <stdarg.h> +#include <stdio.h> + +#ifdef USE_CTYPE +#include <ctype.h> +#else +/* Screw ctype. On linux, the is* functions call a routine that gets +   the ctype map in the current locale.  Because this operation is +   expensive, the map is cached on a per-thread basis.  I am not +   willing to link this crap with FFTW.  Not over my dead body. + +   Sic transit gloria mundi. +*/ +#undef isspace +#define isspace(x) ((x) >= 0 && (x) <= ' ') +#undef isdigit +#define isdigit(x) ((x) >= '0' && (x) <= '9') +#undef isupper +#define isupper(x) ((x) >= 'A' && (x) <= 'Z') +#undef islower +#define islower(x) ((x) >= 'a' && (x) <= 'z') +#endif + +static int mygetc(scanner *sc) +{ +     if (sc->ungotc != EOF) { +	  int c = sc->ungotc; +	  sc->ungotc = EOF; +	  return c; +     } +     return(sc->getchr(sc)); +} + +#define GETCHR(sc) mygetc(sc) + +static void myungetc(scanner *sc, int c) +{ +     sc->ungotc = c; +} + +#define UNGETCHR(sc, c) myungetc(sc, c) + +static void eat_blanks(scanner *sc) +{ +     int ch; +     while (ch = GETCHR(sc), isspace(ch)) +          ; +     UNGETCHR(sc, ch); +} + +static void mygets(scanner *sc, char *s, size_t maxlen) +{ +     char *s0 = s; +     int ch; + +     A(maxlen > 0); +     while ((ch = GETCHR(sc)) != EOF && !isspace(ch) +	    && ch != ')' && ch != '(' && s < s0 + maxlen) +	  *s++ = ch; +     *s = 0; +     UNGETCHR(sc, ch); +} + +static long getlong(scanner *sc, int base, int *ret) +{ +     int sign = 1, ch, count; +     long x = 0;      + +     ch = GETCHR(sc); +     if (ch == '-' || ch == '+') { +	  sign = ch == '-' ? -1 : 1; +	  ch = GETCHR(sc); +     } +     for (count = 0; ; ++count) { +	  if (isdigit(ch))  +	       ch -= '0'; +	  else if (isupper(ch)) +	       ch -= 'A' - 10; +	  else if (islower(ch)) +	       ch -= 'a' - 10; +	  else +	       break; +	  x = x * base + ch; +	  ch = GETCHR(sc); +     } +     x *= sign; +     UNGETCHR(sc, ch); +     *ret = count > 0; +     return x; +} + +/* vscan is mostly scanf-like, with our additional format specifiers, +   but with a few twists.  It returns simply 0 or 1 indicating whether +   the match was successful. '(' and ')' in the format string match +   those characters preceded by any whitespace.  Finally, if a +   character match fails, it will ungetchr() the last character back +   onto the stream. */ +static int vscan(scanner *sc, const char *format, va_list ap) +{ +     const char *s = format; +     char c; +     int ch = 0; +     size_t fmt_len; + +     while ((c = *s++)) { +	  fmt_len = 0; +          switch (c) { +	      case '%': +	  getformat: +		   switch ((c = *s++)) { +		       case 's': { +			    char *x = va_arg(ap, char *); +			    mygets(sc, x, fmt_len); +			    break; +		       } +		       case 'd': { +			    int *x = va_arg(ap, int *); +			    *x = (int) getlong(sc, 10, &ch); +			    if (!ch) return 0; +			    break; +		       } +		       case 'x': { +			    int *x = va_arg(ap, int *); +			    *x = (int) getlong(sc, 16, &ch); +			    if (!ch) return 0; +			    break; +		       } +		       case 'M': { +			    md5uint *x = va_arg(ap, md5uint *); +			    *x = 0xffffffffUL & getlong(sc, 16, &ch); +			    if (!ch) return 0; +			    break; +		       } +		       case '*': { +			    if ((fmt_len = va_arg(ap, int)) <= 0) return 0; +			    goto getformat; +		       } +		       default: +			    A(0 /* unknown format */); +			    break; +		   } +		   break; +	      default: +		   if (isspace(c) || c == '(' || c == ')') +			eat_blanks(sc); +		   if (!isspace(c) && (ch = GETCHR(sc)) != c) { +			UNGETCHR(sc, ch); +			return 0; +		   } +		   break; +          } +     } +     return 1; +} + +static int scan(scanner *sc, const char *format, ...) +{ +     int ret; +     va_list ap; +     va_start(ap, format); +     ret = vscan(sc, format, ap); +     va_end(ap); +     return ret; +} + +scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)) +{ +     scanner *s = (scanner *)MALLOC(size, OTHER); +     s->scan = scan; +     s->vscan = vscan; +     s->getchr = getchr; +     s->ungotc = EOF; +     return s; +} + +void X(scanner_destroy)(scanner *sc) +{ +     X(ifree)(sc); +} diff --git a/src/fftw3/kernel/solver.c b/src/fftw3/kernel/solver.c new file mode 100644 index 0000000..4bfb899 --- /dev/null +++ b/src/fftw3/kernel/solver.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: solver.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +solver *X(mksolver)(size_t size, const solver_adt *adt) +{ +     solver *s = (solver *)MALLOC(size, SOLVERS); + +     s->adt = adt; +     s->refcnt = 0; +     return s; +} + +void X(solver_use)(solver *ego) +{ +     ++ego->refcnt; +} + +void X(solver_destroy)(solver *ego) +{ +     if ((--ego->refcnt) == 0) +          X(ifree)(ego); +} + +void X(solver_register)(planner *plnr, solver *s) +{ +     plnr->adt->register_solver(plnr, s); +} diff --git a/src/fftw3/kernel/solvtab.c b/src/fftw3/kernel/solvtab.c new file mode 100644 index 0000000..496915e --- /dev/null +++ b/src/fftw3/kernel/solvtab.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: solvtab.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +void X(solvtab_exec)(solvtab tbl, planner *p) +{ +     for (; tbl->reg_nam; ++tbl) { +	  p->cur_reg_nam = tbl->reg_nam; +	  p->cur_reg_id = 0; +	  tbl->reg(p); +     } +     p->cur_reg_nam = 0; +} diff --git a/src/fftw3/kernel/square.c b/src/fftw3/kernel/square.c new file mode 100644 index 0000000..4b5afab --- /dev/null +++ b/src/fftw3/kernel/square.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: square.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +int X(square)(int x) +{ +     return x * x; +} diff --git a/src/fftw3/kernel/stride.c b/src/fftw3/kernel/stride.c new file mode 100644 index 0000000..fda049f --- /dev/null +++ b/src/fftw3/kernel/stride.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: stride.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ +#include "ifftw.h" + +#ifdef PRECOMPUTE_ARRAY_INDICES +stride X(mkstride)(int n, int s) +{ +     int i; +     int *p = (int *) MALLOC(n * sizeof(int), STRIDES); + +     for (i = 0; i < n; ++i) +          p[i] = s * i; + +     return p; +} + +void X(stride_destroy)(stride p) +{ +     X(ifree0)(p); +} + +#endif diff --git a/src/fftw3/kernel/tensor.c b/src/fftw3/kernel/tensor.c new file mode 100644 index 0000000..1161963 --- /dev/null +++ b/src/fftw3/kernel/tensor.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +tensor *X(mktensor)(int rnk)  +{ +     tensor *x; + +     A(rnk >= 0); + +#if defined(STRUCT_HACK_KR) +     if (FINITE_RNK(rnk) && rnk > 1) +	  x = (tensor *)MALLOC(sizeof(tensor) + (rnk - 1) * sizeof(iodim), +				    TENSORS); +     else +	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS); +#elif defined(STRUCT_HACK_C99) +     if (FINITE_RNK(rnk)) +	  x = (tensor *)MALLOC(sizeof(tensor) + rnk * sizeof(iodim), +				    TENSORS); +     else +	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS); +#else +     x = (tensor *)MALLOC(sizeof(tensor), TENSORS); +     if (FINITE_RNK(rnk) && rnk > 0) +          x->dims = (iodim *)MALLOC(sizeof(iodim) * rnk, TENSORS); +     else +          x->dims = 0; +#endif + +     x->rnk = rnk; +     return x; +} + +void X(tensor_destroy)(tensor *sz) +{ +#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) +     X(ifree0)(sz->dims); +#endif +     X(ifree)(sz); +} + +int X(tensor_sz)(const tensor *sz) +{ +     int i, n = 1; + +     if (!FINITE_RNK(sz->rnk)) +          return 0; + +     for (i = 0; i < sz->rnk; ++i) +          n *= sz->dims[i].n; +     return n; +} + +void X(tensor_md5)(md5 *p, const tensor *t) +{ +     int i; +     X(md5int)(p, t->rnk); +     if (FINITE_RNK(t->rnk)) { +	  for (i = 0; i < t->rnk; ++i) { +	       const iodim *q = t->dims + i; +	       X(md5int)(p, q->n); +	       X(md5int)(p, q->is); +	       X(md5int)(p, q->os); +	  } +     } +} + +/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting +   appropriate n, is, and os components */ +int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os) +{ +     A(t->rnk <= 1); +     if (t->rnk == 1) { +	  const iodim *vd = t->dims; +          *n = vd[0].n; +          *is = vd[0].is; +          *os = vd[0].os; +     } else { +          *n = 1; +          *is = *os = 0; +     } +     return 1; +} + +void X(tensor_print)(const tensor *x, printer *p) +{ +     if (FINITE_RNK(x->rnk)) { +	  int i; +	  int first = 1; +	  p->print(p, "("); +	  for (i = 0; i < x->rnk; ++i) { +	       const iodim *d = x->dims + i; +	       p->print(p, "%s(%d %d %d)",  +			first ? "" : " ", +			d->n, d->is, d->os); +	       first = 0; +	  } +	  p->print(p, ")"); +     } else { +	  p->print(p, "rank-minfty");  +     } +} diff --git a/src/fftw3/kernel/tensor1.c b/src/fftw3/kernel/tensor1.c new file mode 100644 index 0000000..06ad4dc --- /dev/null +++ b/src/fftw3/kernel/tensor1.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +tensor *X(mktensor_0d)(void) +{ +     return X(mktensor(0)); +} + +tensor *X(mktensor_1d)(int n, int is, int os) +{ +     tensor *x = X(mktensor)(1); +     x->dims[0].n = n; +     x->dims[0].is = is; +     x->dims[0].os = os; +     return x; +} diff --git a/src/fftw3/kernel/tensor2.c b/src/fftw3/kernel/tensor2.c new file mode 100644 index 0000000..a45e164 --- /dev/null +++ b/src/fftw3/kernel/tensor2.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +tensor *X(mktensor_2d)(int n0, int is0, int os0, +                      int n1, int is1, int os1) +{ +     tensor *x = X(mktensor)(2); +     x->dims[0].n = n0; +     x->dims[0].is = is0; +     x->dims[0].os = os0; +     x->dims[1].n = n1; +     x->dims[1].is = is1; +     x->dims[1].os = os1; +     return x; +} + diff --git a/src/fftw3/kernel/tensor4.c b/src/fftw3/kernel/tensor4.c new file mode 100644 index 0000000..9b6cd28 --- /dev/null +++ b/src/fftw3/kernel/tensor4.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor4.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +int X(tensor_max_index)(const tensor *sz) +{ +     int i; +     int n = 0; + +     A(FINITE_RNK(sz->rnk)); +     for (i = 0; i < sz->rnk; ++i) { +          const iodim *p = sz->dims + i; +          n += (p->n - 1) * X(imax)(X(iabs)(p->is), X(iabs)(p->os)); +     } +     return n; +} + +#define tensor_min_xstride(sz, xs) {			\ +     A(FINITE_RNK(sz->rnk));				\ +     if (sz->rnk == 0) return 0;			\ +     else {						\ +          int i;					\ +          int s = X(iabs)(sz->dims[0].xs);		\ +          for (i = 1; i < sz->rnk; ++i)			\ +               s = X(imin)(s, X(iabs)(sz->dims[i].xs));	\ +          return s;					\ +     }							\ +} + +int X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is) +int X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os) + +int X(tensor_min_stride)(const tensor *sz) +{ +     return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz)); +} + +int X(tensor_inplace_strides)(const tensor *sz) +{ +     int i; +     A(FINITE_RNK(sz->rnk)); +     for (i = 0; i < sz->rnk; ++i) { +          const iodim *p = sz->dims + i; +          if (p->is != p->os) +               return 0; +     } +     return 1; +} + +int X(tensor_inplace_strides2)(const tensor *a, const tensor *b) +{ +     return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b)); +} diff --git a/src/fftw3/kernel/tensor5.c b/src/fftw3/kernel/tensor5.c new file mode 100644 index 0000000..8144930 --- /dev/null +++ b/src/fftw3/kernel/tensor5.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor5.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +static void dimcpy(iodim *dst, const iodim *src, int rnk) +{ +     int i; +     if (FINITE_RNK(rnk)) +          for (i = 0; i < rnk; ++i) +               dst[i] = src[i]; +} + +tensor *X(tensor_copy)(const tensor *sz) +{ +     tensor *x = X(mktensor)(sz->rnk); +     dimcpy(x->dims, sz->dims, sz->rnk); +     return x; +} + +/* like X(tensor_copy), but makes strides in-place by +   setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */ +tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k) +{ +     tensor *x = X(tensor_copy)(sz); +     if (FINITE_RNK(x->rnk)) { +	  int i; +	  if (k == INPLACE_OS) +	       for (i = 0; i < x->rnk; ++i) +		    x->dims[i].is = x->dims[i].os; +	  else +	       for (i = 0; i < x->rnk; ++i) +		    x->dims[i].os = x->dims[i].is; +     } +     return x; +} + +/* Like X(tensor_copy), but copy all of the dimensions *except* +   except_dim. */ +tensor *X(tensor_copy_except)(const tensor *sz, int except_dim) +{ +     tensor *x; + +     A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk); +     x = X(mktensor)(sz->rnk - 1); +     dimcpy(x->dims, sz->dims, except_dim); +     dimcpy(x->dims + except_dim, sz->dims + except_dim + 1, +            x->rnk - except_dim); +     return x; +} + +/* Like X(tensor_copy), but copy only rnk dimensions starting +   with start_dim. */ +tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk) +{ +     tensor *x; + +     A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk); +     x = X(mktensor)(rnk); +     dimcpy(x->dims, sz->dims + start_dim, rnk); +     return x; +} + +tensor *X(tensor_append)(const tensor *a, const tensor *b) +{ +     if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) { +          return X(mktensor)(RNK_MINFTY); +     } else { +	  tensor *x = X(mktensor)(a->rnk + b->rnk); +          dimcpy(x->dims, a->dims, a->rnk); +          dimcpy(x->dims + a->rnk, b->dims, b->rnk); +	  return x; +     } +} diff --git a/src/fftw3/kernel/tensor7.c b/src/fftw3/kernel/tensor7.c new file mode 100644 index 0000000..ceae2c4 --- /dev/null +++ b/src/fftw3/kernel/tensor7.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor7.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +/* total order among iodim's */ +int X(dimcmp)(const iodim *a, const iodim *b) +{ +     if (b->is != a->is) +          return (b->is - a->is);	/* shorter strides go later */ +     if (b->os != a->os) +          return (b->os - a->os);	/* shorter strides go later */ +     return (int)(a->n - b->n);	        /* larger n's go later */ +} + +/* Like tensor_copy, but eliminate n == 1 dimensions, which +   never affect any transform or transform vector. +  +   Also, we sort the tensor into a canonical order of decreasing +   is.  In general, processing a loop/array in order of +   decreasing stride will improve locality; sorting also makes the +   analysis in fftw_tensor_contiguous (below) easier.  The choice +   of is over os is mostly arbitrary, and hopefully +   shouldn't affect things much.  Normally, either the os will be +   in the same order as is (for e.g. multi-dimensional +   transforms) or will be in opposite order (e.g. for Cooley-Tukey +   recursion).  (Both forward and backwards traversal of the tensor +   are considered e.g. by vrank-geq1, so sorting in increasing +   vs. decreasing order is not really important.) */ +tensor *X(tensor_compress)(const tensor *sz) +{ +     int i, rnk; +     tensor *x; + +     A(FINITE_RNK(sz->rnk)); +     for (i = rnk = 0; i < sz->rnk; ++i) { +          A(sz->dims[i].n > 0); +          if (sz->dims[i].n != 1) +               ++rnk; +     } + +     x = X(mktensor)(rnk); +     for (i = rnk = 0; i < sz->rnk; ++i) { +          if (sz->dims[i].n != 1) +               x->dims[rnk++] = sz->dims[i]; +     } + +     if (rnk) { +	  /* God knows how qsort() behaves if n==0 */ +	  qsort(x->dims, (size_t)x->rnk, sizeof(iodim), +		(int (*)(const void *, const void *))X(dimcmp)); +     } + +     return x; +} + +/* Return whether the strides of a and b are such that they form an +   effective contiguous 1d array.  Assumes that a.is >= b.is. */ +static int strides_contig(iodim *a, iodim *b) +{ +     return (a->is == b->is * (int)b->n && +             a->os == b->os * (int)b->n); +} + +/* Like tensor_compress, but also compress into one dimension any +   group of dimensions that form a contiguous block of indices with +   some stride.  (This can safely be done for transform vector sizes.) */ +tensor *X(tensor_compress_contiguous)(const tensor *sz) +{ +     int i, rnk; +     tensor *sz2, *x; + +     if (X(tensor_sz)(sz) == 0)  +	  return X(mktensor)(RNK_MINFTY); + +     sz2 = X(tensor_compress)(sz); +     A(FINITE_RNK(sz2->rnk)); + +     if (sz2->rnk < 2)		/* nothing to compress */ +          return sz2; + +     for (i = rnk = 1; i < sz2->rnk; ++i) +          if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) +               ++rnk; + +     x = X(mktensor)(rnk); +     x->dims[0] = sz2->dims[0]; +     for (i = rnk = 1; i < sz2->rnk; ++i) { +          if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { +               x->dims[rnk - 1].n *= sz2->dims[i].n; +               x->dims[rnk - 1].is = sz2->dims[i].is; +               x->dims[rnk - 1].os = sz2->dims[i].os; +          } else { +               A(rnk < x->rnk); +               x->dims[rnk++] = sz2->dims[i]; +          } +     } + +     X(tensor_destroy)(sz2); +     return x; +} + +/* The inverse of X(tensor_append): splits the sz tensor into +   tensor a followed by tensor b, where a's rank is arnk. */ +void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) +{ +     A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); + +     *a = X(tensor_copy_sub)(sz, 0, arnk); +     *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); +} diff --git a/src/fftw3/kernel/tensor8.c b/src/fftw3/kernel/tensor8.c new file mode 100644 index 0000000..05e9b47 --- /dev/null +++ b/src/fftw3/kernel/tensor8.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor8.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +void X(tensor_destroy2)(tensor *a, tensor *b) +{ +     X(tensor_destroy)(a); +     X(tensor_destroy)(b); +} + +void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d) +{ +     X(tensor_destroy2)(a, b); +     X(tensor_destroy2)(c, d); +} diff --git a/src/fftw3/kernel/tensor9.c b/src/fftw3/kernel/tensor9.c new file mode 100644 index 0000000..33ddf45 --- /dev/null +++ b/src/fftw3/kernel/tensor9.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: tensor9.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +int X(tensor_kosherp)(const tensor *x) +{ +     int i; + +     if (x->rnk < 0) return 0; + +     if (FINITE_RNK(x->rnk)) { +	  for (i = 0; i < x->rnk; ++i) +	       if (x->dims[i].n < 0) +		    return 0; +     } +     return 1; +} diff --git a/src/fftw3/kernel/timer.c b/src/fftw3/kernel/timer.c new file mode 100644 index 0000000..72969ad --- /dev/null +++ b/src/fftw3/kernel/timer.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: timer.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +#include "ifftw.h" + +#ifdef HAVE_UNISTD_H +#  include <unistd.h> +#endif + +#ifndef WITH_SLOW_TIMER +#  include "cycle.h" +#else +#  if TIME_WITH_SYS_TIME +#   include <sys/time.h> +#   include <time.h> +#  else +#   if HAVE_SYS_TIME_H +#    include <sys/time.h> +#   else +#    include <time.h> +#   endif +#  endif +#endif + +#ifndef FFTW_TIME_LIMIT +#define FFTW_TIME_LIMIT 2.0  /* don't run for more than two seconds */ +#endif + +#ifdef HAVE_BSDGETTIMEOFDAY +#ifndef HAVE_GETTIMEOFDAY +#define gettimeofday BSDgettimeofday +#define HAVE_GETTIMEOFDAY 1 +#endif +#endif + +#if defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_SECONDS_TIMER) +typedef struct timeval seconds; + +static seconds getseconds(void) +{ +     struct timeval tv; +     gettimeofday(&tv, 0); +     return tv; +} + +static double elapsed_sec(seconds t1, seconds t0) +{ +     return (double)(t1.tv_sec - t0.tv_sec) + +	  (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6; +} + +#  define TIME_MIN_SEC 1.0e-2 /* from fftw2 */ +#  define HAVE_SECONDS_TIMER +#endif + +#ifndef HAVE_SECONDS_TIMER +#  include <time.h> + +typedef clock_t seconds; + +static seconds getseconds(void) { return clock(); } + +static double elapsed_sec(seconds t1, seconds t0) +{ +     return ((double) (t1 - t0)) / CLOCKS_PER_SEC; +} + +#  define TIME_MIN_SEC 2.0e-1 /* from fftw2 */ +#  define HAVE_SECONDS_TIMER +#endif + +#ifdef WITH_SLOW_TIMER +/* excruciatingly slow; only use this if there is no choice! */ +typedef seconds ticks; +#  define getticks getseconds +#  define elapsed elapsed_sec +#  define TIME_MIN TIME_MIN_SEC +#  define TIME_REPEAT 4 /* from fftw2 */ +#  define HAVE_TICK_COUNTER +#endif + +#ifdef HAVE_TICK_COUNTER + +#  ifndef TIME_MIN +#    define TIME_MIN 100.0 +#  endif + +#  ifndef TIME_REPEAT +#    define TIME_REPEAT 8 +#  endif + +  static double measure(plan *pln, const problem *p, int iter) +  { +       ticks t0, t1; +       int i; + +       t0 = getticks(); +       for (i = 0; i < iter; ++i)  +	    pln->adt->solve(pln, p); +       t1 = getticks(); +       return elapsed(t1, t0); +  } + + +  double X(measure_execution_time)(plan *pln, const problem *p) +  { +       seconds begin, now; +       double t, tmax, tmin; +       int iter; +       int repeat; + +       AWAKE(pln, 1); +       p->adt->zero(p); + +  start_over: +       for (iter = 1; iter; iter *= 2) { +	    tmin = 1.0E10; +	    tmax = -1.0E10; + +	    begin = getseconds(); +	    /* repeat the measurement TIME_REPEAT times */ +	    for (repeat = 0; repeat < TIME_REPEAT; ++repeat) { +		 t = measure(pln, p, iter); + +		 if (t < 0) +		      goto start_over; + +		 if (t < tmin) +		      tmin = t; +		 if (t > tmax) +		      tmax = t; + +		 /* do not run for too long */ +		 now = getseconds(); +		 t = elapsed_sec(now, begin); + +		 if (t > FFTW_TIME_LIMIT) +		      break; +	    } + +	    if (tmin >= TIME_MIN) { +		 tmin /= (double) iter; +		 tmax /= (double) iter; +		 AWAKE(pln, 0); +		 return tmin; +	    } +       } +       goto start_over; /* may happen if timer is screwed up */ +  } + +#else /* no cycle counter */ + +  double X(measure_execution_time)(plan *pln, const problem *p) +  { +       UNUSED(p); +       UNUSED(pln); +       return -1.0; +  } + +#endif diff --git a/src/fftw3/kernel/transpose.c b/src/fftw3/kernel/transpose.c new file mode 100644 index 0000000..fb489bd --- /dev/null +++ b/src/fftw3/kernel/transpose.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* transposes of unit-stride arrays, including arrays of N-tuples and +   non-square matrices, using cache-oblivious recursive algorithms */ + +#include "ifftw.h" +#include <string.h> /* memcpy */ + +#define CUTOFF 8 /* size below which we do a naive transpose */ + +/*************************************************************************/ +/* some utilities for the solvers */ + +static int Ntuple_transposable(const iodim *a, const iodim *b, +			       int vl, int s, R *ri, R *ii) +{ +     return(2 == s && (ii == ri + 1 || ri == ii + 1) +	    && +	    ((a->is == b->os && a->is == (vl*2) +	      && a->os == b->n * (vl*2) && b->is == a->n * (vl*2)) +	     || +	     (a->os == b->is && a->os == (vl*2) +	      && a->is == b->n * (vl*2) && b->os == a->n * (vl*2)))); +} + + +/* our solvers' transpose routines work for square matrices of arbitrary +   stride, or for non-square matrices of a given vl*vl2 corresponding +   to the N of the Ntuple with vl2 == s. */ +int X(transposable)(const iodim *a, const iodim *b, +		    int vl, int s, R *ri, R *ii) +{ +     return ((a->n == b->n && a->os == b->is && a->is == b->os) +	     || Ntuple_transposable(a, b, vl, s, ri, ii)); +} + +static int gcd(int a, int b) +{ +     int r; +     do { +	  r = a % b; +	  a = b; +	  b = r; +     } while (r != 0); +      +     return a; +} + +/* all of the solvers need to extract n, m, d, n/d, and m/d from the +   two iodims, so we put it here to save code space */ +void X(transpose_dims)(const iodim *a, const iodim *b, +		       int *n, int *m, int *d, int *nd, int *md) +{ +     int n0, m0, d0; +     /* matrix should be n x m, row-major */ +     if (a->is < b->is) { +	  *n = n0 = b->n; +	  *m = m0 = a->n; +     } +     else { +	  *n = n0 = a->n; +	  *m = m0 = b->n; +     } +     *d = d0 = gcd(n0, m0); +     *nd = n0 / d0; +     *md = m0 / d0; +} + +/* use the simple square transpose in the solver for square matrices +   that aren't too big or which have the wrong stride */ +int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s, +			 R *ri, R *ii) +{ +     return (a->n == b->n && +	     (a->n*(vl*2) < CUTOFF  +	      ||  !Ntuple_transposable(a, b, vl, s, ri, ii))); +} + +/* use the slow general transpose if the buffer would be more than 1/8 +   the whole transpose and the transpose is fairly big. +   (FIXME: use the CONSERVE_MEMORY flag?) */ +int X(transpose_slowp)(const iodim *a, const iodim *b, int N) +{ +     int d = gcd(a->n, b->n); +     return (d < 8 && (a->n * b->n * N) / d > 65536); +} + +/*************************************************************************/ +/* Out-of-place transposes: */ + +/* Transpose A (n x m) to B (m x n), where A and B are stored +   as n x fda and m x fda arrays, respectively, operating on N-tuples: */ +static void rec_transpose_Ntuple(R *A, R *B, int n, int m, int fda, int fdb, +			  int N) +{ +     if (n == 1 || m == 1 || (n + m) * N < CUTOFF*2) { +	  int i, j, k; +	  for (i = 0; i < n; ++i) { +	       for (j = 0; j < m; ++j) { +		    for (k = 0; k < N; ++k) { /* FIXME: unroll */ +			 B[(j*fdb + i) * N + k] = A[(i*fda + j) * N + k]; +		    } +	       } +	  } +     } +     else if (n > m) { +	  int n2 = n / 2; +	  rec_transpose_Ntuple(A, B, n2, m, fda, fdb, N); +	  rec_transpose_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, fdb, N); +     } +     else { +	  int m2 = m / 2; +	  rec_transpose_Ntuple(A, B, n, m2, fda, fdb, N); +	  rec_transpose_Ntuple(A + m2*N, B + m2*N*fdb, n, m - m2, fda, fdb, N); +     } +} + +/*************************************************************************/ +/* In-place transposes of square matrices of N-tuples: */ + +/* Transpose both A and B, where A is n x m and B is m x n, storing +   the transpose of A in B and the transpose of B in A.  A and B +   are actually stored as n x fda and m x fda arrays. */ +static void rec_transpose_swap_Ntuple(R *A, R *B, int n, int m, int fda, int N) +{ +     if (n == 1 || m == 1 || (n + m) * N <= CUTOFF*2) { +	  switch (N) { +	      case 1: { +		   int i, j; +		   for (i = 0; i < n; ++i) { +			for (j = 0; j < m; ++j) { +			     R a = A[(i*fda + j)]; +			     A[(i*fda + j)] = B[(j*fda + i)]; +			     B[(j*fda + i)] = a; +			} +		   } +		   break; +	      } +	      case 2: { +		   int i, j; +		   for (i = 0; i < n; ++i) { +			for (j = 0; j < m; ++j) { +			     R a0 = A[(i*fda + j) * 2 + 0]; +			     R a1 = A[(i*fda + j) * 2 + 1]; +			     A[(i*fda + j) * 2 + 0] = B[(j*fda + i) * 2 + 0]; +			     A[(i*fda + j) * 2 + 1] = B[(j*fda + i) * 2 + 1]; +			     B[(j*fda + i) * 2 + 0] = a0; +			     B[(j*fda + i) * 2 + 1] = a1; +			} +		   } +		   break; +	      } +	      default: { +		   int i, j, k; +		   for (i = 0; i < n; ++i) { +			for (j = 0; j < m; ++j) { +			     for (k = 0; k < N; ++k) { +				  R a = A[(i*fda + j) * N + k]; +				  A[(i*fda + j) * N + k] =  +				       B[(j*fda + i) * N + k]; +				  B[(j*fda + i) * N + k] = a; +			     } +			} +		   } +	      } +	  } +     } else if (n > m) { +	  int n2 = n / 2; +	  rec_transpose_swap_Ntuple(A, B, n2, m, fda, N); +	  rec_transpose_swap_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, N); +     } +     else { +	  int m2 = m / 2; +	  rec_transpose_swap_Ntuple(A, B, n, m2, fda, N); +	  rec_transpose_swap_Ntuple(A + m2*N, B + m2*N*fda, n, m - m2, fda, N); +     } +} + +/* Transpose A, an n x n matrix (stored as n x fda), in-place. */ +static void rec_transpose_sq_ip_Ntuple(R *A, int n, int fda, int N) +{ +     if (n == 1) +	  return; +     else if (n*N <= CUTOFF) { +	  switch (N) { +	      case 1: { +		   int i, j; +		   for (i = 0; i < n; ++i) { +			for (j = i + 1; j < n; ++j) { +			     R a = A[(i*fda + j)]; +			     A[(i*fda + j)] = A[(j*fda + i)]; +			     A[(j*fda + i)] = a; +			} +		   } +		   break; +	      } +	      case 2: { +		   int i, j; +		   for (i = 0; i < n; ++i) { +			for (j = i + 1; j < n; ++j) { +			     R a0 = A[(i*fda + j) * 2 + 0]; +			     R a1 = A[(i*fda + j) * 2 + 1]; +			     A[(i*fda + j) * 2 + 0] = A[(j*fda + i) * 2 + 0]; +			     A[(i*fda + j) * 2 + 1] = A[(j*fda + i) * 2 + 1]; +			     A[(j*fda + i) * 2 + 0] = a0; +			     A[(j*fda + i) * 2 + 1] = a1; +			} +		   } +		   break; +	      } +	      default: { +		   int i, j, k; +		   for (i = 0; i < n; ++i) { +			for (j = i + 1; j < n; ++j) { +			     for (k = 0; k < N; ++k) { +				  R a = A[(i*fda + j) * N + k]; +				  A[(i*fda + j) * N + k] =  +				       A[(j*fda + i) * N + k]; +				  A[(j*fda + i) * N + k] = a; +			     } +			} +		   } +	      } +	  } +     } else { +	  int n2 = n / 2; +	  rec_transpose_sq_ip_Ntuple(A, n2, fda, N); +	  rec_transpose_sq_ip_Ntuple((A + n2*N) + n2*N*fda, n - n2, fda, N); +	  rec_transpose_swap_Ntuple(A + n2*N, A + n2*N*fda, n2, n - n2, fda,N); +     } +} + +/*************************************************************************/ +/* In-place transposes of non-square matrices: */ + +/* Transpose the matrix A in-place, where A is an (n*d) x (m*d) matrix +   of N-tuples and buf contains at least n*m*d*N elements.  In +   general, to transpose a p x q matrix, you should call this routine +   with d = gcd(p, q), n = p/d, and m = q/d. */ +void X(transpose)(R *A, int n, int m, int d, int N, R *buf) +{ +     A(n > 0 && m > 0 && N > 0 && d > 0); +     if (d == 1) { +	  rec_transpose_Ntuple(A, buf, n,m, m,n, N); +	  memcpy(A, buf, m*n*N*sizeof(R)); +     } +     else if (n*m == 1) { +	  rec_transpose_sq_ip_Ntuple(A, d, d, N); +     } +     else { +	  int i, num_el = n*m*d*N; + +	  /* treat as (d x n) x (d' x m) matrix.  (d' = d) */ + +	  /* First, transpose d x (n x d') x m to d x (d' x n) x m, +	     using the buf matrix.  This consists of d transposes +	     of contiguous n x d' matrices of m-tuples. */ +	  if (n > 1) { +	       for (i = 0; i < d; ++i) { +		    rec_transpose_Ntuple(A + i*num_el, buf, +					 n,d, d,n, m*N); +		    memcpy(A + i*num_el, buf, num_el*sizeof(R)); +	       } +	  } +	   +	  /* Now, transpose (d x d') x (n x m) to (d' x d) x (n x m), which +	     is a square in-place transpose of n*m-tuples: */ +	  rec_transpose_sq_ip_Ntuple(A, d, d, n*m*N); + +	  /* Finally, transpose d' x ((d x n) x m) to d' x (m x (d x n)), +	     using the buf matrix.  This consists of d' transposes +	     of contiguous d*n x m matrices. */ +	  if (m > 1) { +	       for (i = 0; i < d; ++i) { +		    rec_transpose_Ntuple(A + i*num_el, buf, +					 d*n,m, m,d*n, N); +		    memcpy(A + i*num_el, buf, num_el*sizeof(R)); +	       } +	  } +     } +} + +/*************************************************************************/ +/* In-place transpose routine from TOMS.  This routine is much slower +   than the cache-oblivious algorithm above, but is has the advantage +   of requiring less buffer space for the case of gcd(nx,ny) small. */ + +/* + * TOMS Transpose.  Revised version of algorithm 380. + *  + * These routines do in-place transposes of arrays. + *  + * [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software,  + *   vol. 3, no. 1, 104-110 (1977) ] + *  + * C version by Steven G. Johnson. February 1997. + */ + +/* + * "a" is a 1D array of length ny*nx*N which constains the nx x ny + * matrix of N-tuples to be transposed.  "a" is stored in row-major + * order (last index varies fastest).  move is a 1D array of length + * move_size used to store information to speed up the process.  The + * value move_size=(ny+nx)/2 is recommended.  buf should be an array + * of length 2*N. + *  + */ + +void X(transpose_slow)(R *a, int nx, int ny, int N, +		       char *move, int move_size, R *buf) +{ +     int i, j, im, mn; +     R *b, *c, *d; +     int ncount; +     int k; +      +     /* check arguments and initialize: */ +     A(ny > 0 && nx > 0 && N > 0 && move_size > 0); +      +     b = buf; +      +     if (ny == nx) { +	  /* +	   * if matrix is square, exchange elements a(i,j) and a(j,i): +	   */ +	  for (i = 0; i < nx; ++i) +	       for (j = i + 1; j < nx; ++j) { +		    memcpy(b, &a[N * (i + j * nx)], N * sizeof(R)); +		    memcpy(&a[N * (i + j * nx)], &a[N * (j + i * nx)], N * sizeof(R)); +		    memcpy(&a[N * (j + i * nx)], b, N * sizeof(R)); +	       } +	  return; +     } +     c = buf + N; +     ncount = 2;		/* always at least 2 fixed points */ +     k = (mn = ny * nx) - 1; +      +     for (i = 0; i < move_size; ++i) +	  move[i] = 0; +      +     if (ny >= 3 && nx >= 3) +	  ncount += gcd(ny - 1, nx - 1) - 1;	/* # fixed points */ +      +     i = 1; +     im = ny; +      +     while (1) { +	  int i1, i2, i1c, i2c; +	  int kmi; +	   +	  /** Rearrange the elements of a loop +	      and its companion loop: **/ +	   +	  i1 = i; +	  kmi = k - i; +	  memcpy(b, &a[N * i1], N * sizeof(R)); +	  i1c = kmi; +	  memcpy(c, &a[N * i1c], N * sizeof(R)); +	   +	  while (1) { +	       i2 = ny * i1 - k * (i1 / nx); +	       i2c = k - i2; +	       if (i1 < move_size) +		    move[i1] = 1; +	       if (i1c < move_size) +		    move[i1c] = 1; +	       ncount += 2; +	       if (i2 == i) +		    break; +	       if (i2 == kmi) { +		    d = b; +		    b = c; +		    c = d; +		    break; +	       } +	       memcpy(&a[N * i1], &a[N * i2],  +		      N * sizeof(R)); +	       memcpy(&a[N * i1c], &a[N * i2c],  +		      N * sizeof(R)); +	       i1 = i2; +	       i1c = i2c; +	  } +	  memcpy(&a[N * i1], b, N * sizeof(R)); +	  memcpy(&a[N * i1c], c, N * sizeof(R)); +	   +	  if (ncount >= mn) +	       break;	/* we've moved all elements */ +	   +	  /** Search for loops to rearrange: **/ +	   +	  while (1) { +	       int max = k - i; +	       ++i; +	       A(i <= max); +	       im += ny; +	       if (im > k) +		    im -= k; +	       i2 = im; +	       if (i == i2) +		    continue; +	       if (i >= move_size) { +		    while (i2 > i && i2 < max) { +			 i1 = i2; +			 i2 = ny * i1 - k * (i1 / nx); +		    } +		    if (i2 == i) +			 break; +	       } else if (!move[i]) +		    break; +	  } +     } +} diff --git a/src/fftw3/kernel/trig.c b/src/fftw3/kernel/trig.c new file mode 100644 index 0000000..0d4c3b2 --- /dev/null +++ b/src/fftw3/kernel/trig.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: trig.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* trigonometric functions */ +#include "ifftw.h" +#include <math.h> + +trigreal X(cos2pi)(int m, int n) +{ +     return X(sincos)((trigreal)m, (trigreal)n, 0); +} + +trigreal X(sin2pi)(int m, int n) +{ +     return X(sincos)((trigreal)m, (trigreal)n, 1); +} + +trigreal X(tan2pi)(int m, int n) +{ +#if 0      /* unimplemented, unused */ +     trigreal dm = m, dn = n; +     return TAN(by2pi(dm, dn)); +#endif +     UNUSED(m); UNUSED(n); +     return 0.0; +} diff --git a/src/fftw3/kernel/trig1.c b/src/fftw3/kernel/trig1.c new file mode 100644 index 0000000..b86362c --- /dev/null +++ b/src/fftw3/kernel/trig1.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: trig1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* trigonometric functions */ +#include "ifftw.h" +#include <math.h> + +#ifdef FFTW_LDOUBLE +#  define COS cosl +#  define SIN sinl +#  define TAN tanl +#  define KTRIG(x) (x##L) +#else +#  define COS cos +#  define SIN sin +#  define TAN tan +#  define KTRIG(x) (x) +#endif + +static const trigreal K2PI = +    KTRIG(6.2831853071795864769252867665590057683943388); +#define by2pi(m, n) ((K2PI * (m)) / (n)) + +/* + * Improve accuracy by reducing x to range [0..1/8] + * before multiplication by 2 * PI. + */ + +trigreal X(sincos)(trigreal m, trigreal n, int sinp) +{ +     /* waiting for C to get tail recursion... */ +     trigreal half_n = n * KTRIG(0.5); +     trigreal quarter_n = half_n * KTRIG(0.5); +     trigreal eighth_n = quarter_n * KTRIG(0.5); +     trigreal sgn = KTRIG(1.0); + +     if (sinp) goto sin; + cos: +     if (m < 0) { m = -m; /* goto cos; */ } +     if (m > half_n) { m = n - m; goto cos; } +     if (m > eighth_n) { m = quarter_n - m; goto sin; } +     return sgn * COS(by2pi(m, n)); + + msin: +     sgn = -sgn; + sin: +     if (m < 0) { m = -m; goto msin; } +     if (m > half_n) { m = n - m; goto msin; } +     if (m > eighth_n) { m = quarter_n - m; goto cos; } +     return sgn * SIN(by2pi(m, n)); +} diff --git a/src/fftw3/kernel/twiddle.c b/src/fftw3/kernel/twiddle.c new file mode 100644 index 0000000..785bdd7 --- /dev/null +++ b/src/fftw3/kernel/twiddle.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * + */ + +/* $Id: twiddle.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ + +/* Twiddle manipulation */ + +#include "ifftw.h" +#include <math.h> + +/* table of known twiddle factors */ +static twid *twlist = (twid *) 0; + +static int equal_instr(const tw_instr *p, const tw_instr *q) +{ +     if (p == q) +          return 1; + +     for (;; ++p, ++q) { +          if (p->op != q->op || p->v != q->v || p->i != q->i) +               return 0; +          if (p->op == TW_NEXT)  /* == q->op */ +               return 1; +     } +     A(0 /* can't happen */); +} + +static int ok_twid(const twid *t, const tw_instr *q, int n, int r, int m) +{ +     return (n == t->n && r == t->r && m <= t->m && equal_instr(t->instr, q)); +} + +static twid *lookup(const tw_instr *q, int n, int r, int m) +{ +     twid *p; + +     for (p = twlist; p && !ok_twid(p, q, n, r, m); p = p->cdr) +          ; +     return p; +} + +static int twlen0(int r, const tw_instr **pp) +{ +     int ntwiddle = 0; +     const tw_instr *p = *pp; + +     /* compute length of bytecode program */ +     A(r > 0); +     for ( ; p->op != TW_NEXT; ++p) { +	  switch (p->op) { +	      case TW_FULL: +		   ntwiddle += (r - 1) * 2; +		   break; +	      case TW_GENERIC: +		   ntwiddle += r * 2; +		   break; +	      default: +		   ++ntwiddle; +	  } +     } + +     *pp = p; +     return ntwiddle; +} + +int X(twiddle_length)(int r, const tw_instr *p) +{ +     return twlen0(r, &p); +} + +static R *compute(const tw_instr *instr, int n, int r, int m) +{ +     int ntwiddle, j; +     R *W, *W0; +     const tw_instr *p; + +     static trigreal (*const f[])(int, int) = {  +	  X(cos2pi), X(sin2pi), X(tan2pi)  +     }; + +     p = instr; +     ntwiddle = twlen0(r, &p); + +     W0 = W = (R *)MALLOC(ntwiddle * (m / p->v) * sizeof(R), TWIDDLES); + +     for (j = 0; j < m; j += p->v) { +          for (p = instr; p->op != TW_NEXT; ++p) { +	       switch (p->op) { +		   case TW_FULL: +		   { +			int i; +			A((int)p->i == r); /* consistency check */ +			for (i = 1; i < r; ++i) { +			     *W++ = f[TW_COS]((j + p->v) * i, n); +			     *W++ = f[TW_SIN]((j + p->v) * i, n); +			} +			break; +		   } + +		   case TW_GENERIC: +		   { +			int i; +			A(p->v == 0); /* unused */ +			A(p->i == 0); /* unused */ +			for (i = 0; i < r; ++i) { +			     int k = j * r + i; +			     *W++ = f[TW_COS](k, n); +			     *W++ = FFT_SIGN * f[TW_SIN](k, n); +			} +			break; +		   } +		    +		   default: +			*W++ = f[p->op](((signed int)(j + p->v)) * p->i, n); +			break; +	       } +	  } +          A(m % p->v == 0); +     } + +     return W0; +} + +void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m) +{ +     twid *p; + +     if (*pp) return;  /* already created */ + +     if ((p = lookup(instr, n, r, m))) { +          ++p->refcnt; +	  goto done; +     } + +     p = (twid *) MALLOC(sizeof(twid), TWIDDLES); +     p->n = n; +     p->r = r; +     p->m = m; +     p->instr = instr; +     p->refcnt = 1; +     p->W = compute(instr, n, r, m); + +     /* cons! onto twlist */ +     p->cdr = twlist; +     twlist = p; + + done: +     *pp = p; +     return; +} + +void X(twiddle_destroy)(twid **pp) +{ +     twid *p = *pp; +     if (p) { +          twid **q; +          if ((--p->refcnt) == 0) { +               /* remove p from twiddle list */ +               for (q = &twlist; *q; q = &((*q)->cdr)) { +                    if (*q == p) { +                         *q = p->cdr; +                         X(ifree)(p->W); +                         X(ifree)(p); +			 goto done; +                    } +               } +               A(0 /* can't happen */ ); +          } +     } + done: +     *pp = 0; /* destroy pointer */ +     return; +} + + +void X(twiddle_awake)(int flg, twid **pp,  +		      const tw_instr *instr, int n, int r, int m) +{ +     if (flg)  +	  X(mktwiddle)(pp, instr, n, r, m); +     else  +	  X(twiddle_destroy)(pp); +}  | 
