diff options
author | scuri <scuri> | 2009-08-20 12:35:06 +0000 |
---|---|---|
committer | scuri <scuri> | 2009-08-20 12:35:06 +0000 |
commit | 5d735255ddd3cb2f547abd3d03969af3fb7eb04e (patch) | |
tree | 8fb66510bc625bb1b08ccb41f1b83fb0f7cb8f19 /src/fftw3/kernel | |
parent | 35733b87eed86e5228f12fa10c98a3d9d22a6073 (diff) |
*** empty log message ***
Diffstat (limited to 'src/fftw3/kernel')
40 files changed, 0 insertions, 5400 deletions
diff --git a/src/fftw3/kernel/align.c b/src/fftw3/kernel/align.c deleted file mode 100644 index 7f475d5..0000000 --- a/src/fftw3/kernel/align.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: align.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -#if HAVE_3DNOW -# define ALGN 8 -#elif HAVE_SIMD -# define ALGN 16 -#elif HAVE_K7 -# define ALGN 8 -#else -# define ALGN (sizeof(R)) -#endif - -/* NONPORTABLE */ -int X(alignment_of)(R *p) -{ - return (int)(((uintptr_t) p) % ALGN); -} diff --git a/src/fftw3/kernel/alloc.c b/src/fftw3/kernel/alloc.c deleted file mode 100644 index a95e0e8..0000000 --- a/src/fftw3/kernel/alloc.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: alloc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN -# if defined(HAVE_MALLOC_H) -# include <malloc.h> -# else -extern void *memalign(size_t, size_t); -# endif -#endif - -#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN -extern int posix_memalign(void **, size_t, size_t); -#endif - -#if defined(macintosh) /* MacOS 9 */ -# include <Multiprocessing.h> -#endif - -#define real_malloc X(malloc) -#define real_free free /* memalign and malloc use ordinary free */ - -#if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16) -/* Our own 16-byte aligned malloc/free. Assumes sizeof(void*) is a - power of two <= 8 and that malloc is at least sizeof(void*)-aligned. - - The main reason for this routine is that, as of this writing, - Windows does not include any aligned allocation routines in its - system libraries, and instead provides an implementation with a - Visual C++ "Processor Pack" that you have to statically link into - your program. We do not want to require users to have VC++ - (e.g. gcc/MinGW should be fine). Our code should be at least as good - as the MS _aligned_malloc, in any case, according to second-hand - reports of the algorithm it employs (also based on plain malloc). */ -static void *our_malloc16(size_t n) -{ - void *p0, *p; - if (!(p0 = malloc(n + 16))) return (void *) 0; - p = (void *) (((uintptr_t) p0 + 16) & (~((uintptr_t) 15))); - *((void **) p - 1) = p0; - return p; -} -static void our_free16(void *p) -{ - if (p) free(*((void **) p - 1)); -} -#endif - -/* part of user-callable API */ -void *X(malloc)(size_t n) -{ - void *p; - -#if defined(MIN_ALIGNMENT) - -# if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16) - p = our_malloc16(n); -# undef real_free -# define real_free our_free16 - -# elif defined(HAVE_MEMALIGN) - p = memalign(MIN_ALIGNMENT, n); - -# elif defined(HAVE_POSIX_MEMALIGN) - /* note: posix_memalign is broken in glibc 2.2.5: it constrains - the size, not the alignment, to be (power of two) * sizeof(void*). - The bug seems to have been fixed as of glibc 2.3.1. */ - if (posix_memalign(&p, MIN_ALIGNMENT, n)) - p = (void*) 0; - -# elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) - /* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */ - p = (void *) _mm_malloc(n, MIN_ALIGNMENT); -# undef real_free -# define real_free _mm_free - -# elif defined(_MSC_VER) - /* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD - and _aligned_malloc/free (uses malloc.h) */ - p = (void *) _aligned_malloc(n, MIN_ALIGNMENT); -# undef real_free -# define real_free _aligned_free - -# elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16) - /* MacOS X malloc is already 16-byte aligned */ - p = malloc(n); - -# elif defined(macintosh) /* MacOS 9 */ - p = (void *) MPAllocateAligned(n, -# if MIN_ALIGNMENT == 8 - kMPAllocate8ByteAligned, -# elif MIN_ALIGNMENT == 16 - kMPAllocate16ByteAligned, -# elif MIN_ALIGNMENT == 32 - kMPAllocate32ByteAligned, -# else -# error "Unknown alignment for MPAllocateAligned" -# endif - 0); -# undef real_free -# define real_free MPFree - -# else - /* Add your machine here and send a patch to fftw@fftw.org - or (e.g. for Windows) configure --with-our-malloc16 */ -# error "Don't know how to malloc() aligned memory." -# endif - -#else /* !defined(MIN_ALIGMENT) */ - p = malloc(n); -#endif - - return p; -} - -/* part of user-callable API */ -void X(free)(void *p) -{ - real_free(p); -} - -/********************************************************** - * DEBUGGING CODE - **********************************************************/ -#if defined(FFTW_DEBUG_MALLOC) - -#include <stdio.h> - -/* - debugging malloc/free. - - 1) Initialize every malloced and freed area to random values, just - to make sure we are not using uninitialized pointers. - - 2) check for blocks freed twice. - - 3) Check for writes past the ends of allocated blocks - - 4) destroy contents of freed blocks in order to detect incorrect reuse. - - 5) keep track of who allocates what and report memory leaks - - This code is a quick and dirty hack. May be nonportable. - Use at your own risk. - -*/ - -#define MAGIC ((size_t)0xABadCafe) -#define PAD_FACTOR 2 -#define SZ_HEADER (4 * sizeof(size_t)) -#define HASHSZ 1031 - -static unsigned int hashaddr(void *p) -{ - return ((unsigned long)p) % HASHSZ; -} - -struct mstat { - int siz; - int maxsiz; - int cnt; - int maxcnt; -}; - -static struct mstat mstat[MALLOC_WHAT_LAST]; - -struct minfo { - const char *file; - int line; - size_t n; - void *p; - struct minfo *next; -}; - -static struct minfo *minfo[HASHSZ] = {0}; - -#ifdef HAVE_THREADS -int X(in_thread) = 0; -#endif - -void *X(malloc_debug)(size_t n, enum malloc_tag what, - const char *file, int line) -{ - char *p; - size_t i; - struct minfo *info; - struct mstat *stat = mstat + what; - struct mstat *estat = mstat + EVERYTHING; - - if (n == 0) - n = 1; - - if (!IN_THREAD) { - stat->siz += n; - if (stat->siz > stat->maxsiz) - stat->maxsiz = stat->siz; - estat->siz += n; - if (estat->siz > estat->maxsiz) - estat->maxsiz = estat->siz; - } - - p = (char *) real_malloc(PAD_FACTOR * n + SZ_HEADER); - A(p); - - /* store the sz in a known position */ - ((size_t *) p)[0] = n; - ((size_t *) p)[1] = MAGIC; - ((size_t *) p)[2] = what; - - /* fill with junk */ - for (i = 0; i < PAD_FACTOR * n; i++) - p[i + SZ_HEADER] = (char) (i ^ 0xEF); - - if (!IN_THREAD) { - ++stat->cnt; - ++estat->cnt; - - if (stat->cnt > stat->maxcnt) - stat->maxcnt = stat->cnt; - if (estat->cnt > estat->maxcnt) - estat->maxcnt = estat->cnt; - } - - /* skip the info we stored previously */ - p = p + SZ_HEADER; - - if (!IN_THREAD) { - unsigned int h = hashaddr(p); - /* record allocation in allocation list */ - info = (struct minfo *) malloc(sizeof(struct minfo)); - info->n = n; - info->file = file; - info->line = line; - info->p = p; - info->next = minfo[h]; - minfo[h] = info; - } - - return (void *) p; -} - -void X(ifree)(void *p) -{ - char *q; - - A(p); - - q = ((char *) p) - SZ_HEADER; - A(q); - - { - size_t n = ((size_t *) q)[0]; - size_t magic = ((size_t *) q)[1]; - int what = ((size_t *) q)[2]; - size_t i; - struct mstat *stat = mstat + what; - struct mstat *estat = mstat + EVERYTHING; - - /* set to zero to detect duplicate free's */ - ((size_t *) q)[0] = 0; - - A(magic == MAGIC); - ((size_t *) q)[1] = ~MAGIC; - - if (!IN_THREAD) { - stat->siz -= n; - A(stat->siz >= 0); - estat->siz -= n; - A(estat->siz >= 0); - } - - /* check for writing past end of array: */ - for (i = n; i < PAD_FACTOR * n; ++i) - if (q[i + SZ_HEADER] != (char) (i ^ 0xEF)) { - A(0 /* array bounds overwritten */ ); - } - for (i = 0; i < PAD_FACTOR * n; ++i) - q[i + SZ_HEADER] = (char) (i ^ 0xAD); - - if (!IN_THREAD) { - --stat->cnt; - --estat->cnt; - - A(stat->cnt >= 0); - A((stat->cnt == 0 && stat->siz == 0) || - (stat->cnt > 0 && stat->siz > 0)); - A(estat->cnt >= 0); - A((estat->cnt == 0 && estat->siz == 0) || - (estat->cnt > 0 && estat->siz > 0)); - } - - real_free(q); - } - - if (!IN_THREAD) { - /* delete minfo entry */ - unsigned int h = hashaddr(p); - struct minfo **i; - - for (i = minfo + h; *i; i = &((*i)->next)) { - if ((*i)->p == p) { - struct minfo *i0 = (*i)->next; - free(*i); - *i = i0; - return; - } - } - - A(0 /* no entry in minfo list */ ); - } -} - -void X(malloc_print_minfo)(int verbose) -{ - struct minfo *info; - int what; - unsigned int h; - - if (verbose) { - static const char *names[MALLOC_WHAT_LAST] = { - "EVERYTHING", - "PLANS", "SOLVERS", "PROBLEMS", "BUFFERS", - "HASHT", "TENSORS", "PLANNERS", "SLVDSC", "TWIDDLES", - "STRIDES", "OTHER" - }; - - printf("%12s %8s %8s %10s %10s\n", - "what", "cnt", "maxcnt", "siz", "maxsiz"); - - for (what = 0; what < MALLOC_WHAT_LAST; ++what) { - struct mstat *stat = mstat + what; - printf("%12s %8d %8d %10d %10d\n", - names[what], stat->cnt, stat->maxcnt, - stat->siz, stat->maxsiz); - } - } - - for (h = 0; h < HASHSZ; ++h) - if (minfo[h]) { - printf("\nUnfreed allocations:\n"); - break; - } - - for (h = 0; h < HASHSZ; ++h) - for (info = minfo[h]; info; info = info->next) { - printf("%s:%d: %d bytes at %p\n", - info->file, info->line, info->n, info->p); - } -} - -#else -/********************************************************** - * NON DEBUGGING CODE - **********************************************************/ -/* production version, no hacks */ - -void *X(malloc_plain)(size_t n) -{ - void *p; - if (n == 0) - n = 1; - p = real_malloc(n); - CK(p); - -#ifdef MIN_ALIGMENT - A((((uintptr_t)p) % MIN_ALIGNMENT) == 0); -#endif - - return p; -} - -void X(ifree)(void *p) -{ - real_free(p); -} - -#endif - -void X(ifree0)(void *p) -{ - /* common pattern */ - if (p) X(ifree)(p); -} diff --git a/src/fftw3/kernel/assert.c b/src/fftw3/kernel/assert.c deleted file mode 100644 index a066db0..0000000 --- a/src/fftw3/kernel/assert.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: assert.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ -#include "ifftw.h" -#include <stdio.h> -#include <stdlib.h> - -void X(assertion_failed)(const char *s, int line, const char *file) -{ - fflush(stdout); - fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s); - exit(EXIT_FAILURE); -} diff --git a/src/fftw3/kernel/awake.c b/src/fftw3/kernel/awake.c deleted file mode 100644 index 7ee716f..0000000 --- a/src/fftw3/kernel/awake.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: awake.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -void X(null_awake)(plan *ego, int awake) -{ - UNUSED(ego); - UNUSED(awake); - /* do nothing */ -} diff --git a/src/fftw3/kernel/cycle.h b/src/fftw3/kernel/cycle.h deleted file mode 100644 index a324c2a..0000000 --- a/src/fftw3/kernel/cycle.h +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* $Id: cycle.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* machine-dependent cycle counters code. Needs to be inlined. */ - -/***************************************************************************/ -/* To use the cycle counters in your code, simply #include "cycle.h" (this - file), and then use the functions/macros: - - ticks getticks(void); - - ticks is an opaque typedef defined below, representing the current time. - You extract the elapsed time between two calls to gettick() via: - - double elapsed(ticks t1, ticks t0); - - which returns a double-precision variable in arbitrary units. You - are not expected to convert this into human units like seconds; it - is intended only for *comparisons* of time intervals. - - (In order to use some of the OS-dependent timer routines like - Solaris' gethrtime, you need to paste the autoconf snippet below - into your configure.ac file and #include "config.h" before cycle.h, - or define the relevant macros manually if you are not using autoconf.) -*/ - -/***************************************************************************/ -/* This file uses macros like HAVE_GETHRTIME that are assumed to be - defined according to whether the corresponding function/type/header - is available on your system. The necessary macros are most - conveniently defined if you are using GNU autoconf, via the tests: - - dnl --------------------------------------------------------------------- - - AC_C_INLINE - AC_HEADER_TIME - AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h]) - - AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H -#include <sys/time.h> -#endif]) - - AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime]) - - dnl Cray UNICOS _rtc() (real-time clock) intrinsic - AC_MSG_CHECKING([for _rtc intrinsic]) - rtc_ok=yes - AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H -#include <intrinsics.h> -#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) - AC_MSG_RESULT($rtc_ok) - - dnl --------------------------------------------------------------------- -*/ - -/***************************************************************************/ - -#if TIME_WITH_SYS_TIME -# include <sys/time.h> -# include <time.h> -#else -# if HAVE_SYS_TIME_H -# include <sys/time.h> -# else -# include <time.h> -# endif -#endif - -#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ -{ \ - return (double)(t1 - t0); \ -} - -/*----------------------------------------------------------------*/ -/* Solaris */ -#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) -typedef hrtime_t ticks; - -#define getticks gethrtime - -INLINE_ELAPSED(inline) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* AIX v. 4+ routines to read the real-time clock or time-base register */ -#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) -typedef timebasestruct_t ticks; - -static inline ticks getticks(void) -{ - ticks t; - read_real_time(&t, TIMEBASE_SZ); - return t; -} - -static inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ -{ - time_base_to_time(&t1, TIMEBASE_SZ); - time_base_to_time(&t0, TIMEBASE_SZ); - return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low)); -} - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* - * PowerPC ``cycle'' counter using the time base register. - */ -#if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh))) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long long ticks; - -static __inline__ ticks getticks(void) -{ - unsigned int tbl, tbu0, tbu1; - - do { - __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); - __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); - __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); - } while (tbu0 != tbu1); - - return (((unsigned long long)tbu0) << 32) | tbl; -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif -/*----------------------------------------------------------------*/ -/* - * Pentium cycle counter - */ -#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long long ticks; - -static __inline__ ticks getticks(void) -{ - ticks ret; - - __asm__ __volatile__("rdtsc": "=A" (ret)); - /* no input, nothing else clobbered */ - return ret; -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/* Visual C++ -- thanks to Morten Nissov for his help with this */ -#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) -#include <windows.h> -typedef LARGE_INTEGER ticks; -#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ - -static __inline ticks getticks(void) -{ - ticks ret; - - __asm { - RDTSC - mov ret.HighPart, edx - mov ret.LowPart, eax - } - return ret; -} - -static __inline double elapsed(ticks t1, ticks t0) -{ - return (double)(t1.QuadPart - t0.QuadPart); -} - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* - * X86-64 cycle counter - */ -#if defined(__GNUC__) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long long ticks; - -static __inline__ ticks getticks(void) -{ - unsigned a, d; - asm volatile("rdtsc" : "=a" (a), "=d" (d)); - return ((ticks)a) | (((ticks)d) << 32); -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/* Visual C++ (FIXME: how to detect compilation for x86-64?) */ -#if _MSC_VER >= 1400 && !defined(HAVE_TICK_COUNTER) -typedef ULONG64 ticks; - -#define getticks __rdtsc - -INLINE_ELAPSED(__inline) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* - * IA64 cycle counter - */ -#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long ticks; - -static __inline__ ticks getticks(void) -{ - ticks ret; - - __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); - return ret; -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ -#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) -#include <machine/sys/inline.h> -typedef unsigned long ticks; - -static inline ticks getticks(void) -{ - ticks ret; - - ret = _Asm_mov_from_ar (_AREG_ITC); - return ret; -} - -INLINE_ELAPSED(inline) - -#define HAVE_TICK_COUNTER -#endif - -/* intel's ecc compiler */ -#if defined(__ECC) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long ticks; -#include <ia64intrin.h> - -static __inline__ ticks getticks(void) -{ - return __getReg(_IA64_REG_AR_ITC); -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* - * PA-RISC cycle counter - */ -#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long ticks; - -# ifdef __GNUC__ -static __inline__ ticks getticks(void) -{ - ticks ret; - - __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); - /* no input, nothing else clobbered */ - return ret; -} -# else -# include <machine/inline.h> -static inline unsigned long getticks(void) -{ - register ticks ret; - _MFCTL(16, ret); - return ret; -} -# endif - -INLINE_ELAPSED(inline) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* S390, courtesy of James Treacy */ -#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long long ticks; - -static __inline__ ticks getticks(void) -{ - ticks cycles; - __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); - return cycles; -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif -/*----------------------------------------------------------------*/ -#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) -/* - * The 32-bit cycle counter on alpha overflows pretty quickly, - * unfortunately. A 1GHz machine overflows in 4 seconds. - */ -typedef unsigned int ticks; - -static __inline__ ticks getticks(void) -{ - unsigned long cc; - __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); - return (cc & 0xFFFFFFFF); -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) -typedef unsigned long ticks; - -static __inline__ ticks getticks(void) -{ - ticks ret; - __asm__("rd %%tick, %0" : "=r" (ret)); - return ret; -} - -INLINE_ELAPSED(__inline__) - -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -#if defined(__DECC) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) -# include <c_asm.h> -typedef unsigned int ticks; - -static __inline ticks getticks(void) -{ - unsigned long cc; - cc = asm("rpcc %v0"); - return (cc & 0xFFFFFFFF); -} - -INLINE_ELAPSED(__inline) - -#define HAVE_TICK_COUNTER -#endif -/*----------------------------------------------------------------*/ -/* SGI/Irix */ -#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) -typedef struct timespec ticks; - -static inline ticks getticks(void) -{ - struct timespec t; - clock_gettime(CLOCK_SGI_CYCLE, &t); - return t; -} - -static inline double elapsed(ticks t1, ticks t0) -{ - return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 + - (double)(t1.tv_nsec - t0.tv_nsec); -} -#define HAVE_TICK_COUNTER -#endif - -/*----------------------------------------------------------------*/ -/* Cray UNICOS _rtc() intrinsic function */ -#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) -#ifdef HAVE_INTRINSICS_H -# include <intrinsics.h> -#endif - -typedef long long ticks; - -#define getticks _rtc - -INLINE_ELAPSED(inline) - -#define HAVE_TICK_COUNTER -#endif - diff --git a/src/fftw3/kernel/debug.c b/src/fftw3/kernel/debug.c deleted file mode 100644 index 63e4a22..0000000 --- a/src/fftw3/kernel/debug.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: debug.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ -#include "ifftw.h" - -#ifdef FFTW_DEBUG -#include <stdio.h> - -typedef struct { - printer super; - FILE *f; -} P_file; - -static void putchr_file(printer *p_, char c) -{ - P_file *p = (P_file *) p_; - fputc(c, p->f); -} - -static printer *mkprinter_file(FILE *f) -{ - P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0); - p->f = f; - return &p->super; -} - -void X(debug)(const char *format, ...) -{ - va_list ap; - printer *p = mkprinter_file(stderr); - va_start(ap, format); - p->vprint(p, format, ap); - va_end(ap); - X(printer_destroy)(p); -} -#endif diff --git a/src/fftw3/kernel/hash.c b/src/fftw3/kernel/hash.c deleted file mode 100644 index f12d5a4..0000000 --- a/src/fftw3/kernel/hash.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "ifftw.h" - -unsigned X(hash)(const char *s) -{ - unsigned h = 0xDEADBEEFu; - do { - h = h * 17 + (int)*s; - } while (*s++); - return h; -} - diff --git a/src/fftw3/kernel/iabs.c b/src/fftw3/kernel/iabs.c deleted file mode 100644 index d2bdb7c..0000000 --- a/src/fftw3/kernel/iabs.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: iabs.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -int X(iabs)(int a) -{ - return (int) (a < 0 ? -a : a); -} diff --git a/src/fftw3/kernel/ifftw.h b/src/fftw3/kernel/ifftw.h deleted file mode 100644 index 0269e18..0000000 --- a/src/fftw3/kernel/ifftw.h +++ /dev/null @@ -1,848 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: ifftw.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* FFTW internal header file */ -#ifndef __IFFTW_H__ -#define __IFFTW_H__ - -#include "config.h" - -#include <stdlib.h> /* size_t */ -#include <stdarg.h> /* va_list */ -#include <stddef.h> /* ptrdiff_t */ - -#if HAVE_SYS_TYPES_H -# include <sys/types.h> -#endif - -#if HAVE_STDINT_H -# include <stdint.h> /* uintptr_t, maybe */ -#endif - -#if HAVE_INTTYPES_H -# include <inttypes.h> /* uintptr_t, maybe */ -#endif - -/* determine precision and name-mangling scheme */ -#define CONCAT(prefix, name) prefix ## name -#if defined(FFTW_SINGLE) -typedef float R; -#define X(name) CONCAT(fftwf_, name) -#elif defined(FFTW_LDOUBLE) -typedef long double R; -#define X(name) CONCAT(fftwl_, name) -#else -typedef double R; -#define X(name) CONCAT(fftw_, name) -#endif - -/* dummy use of unused parameters to silence compiler warnings */ -#define UNUSED(x) (void)x - -#define FFT_SIGN (-1) /* sign convention for forward transforms */ - -/* get rid of that object-oriented stink: */ -#define REGISTER_SOLVER(p, s) X(solver_register)(p, s) - -#define STRINGIZEx(x) #x -#define STRINGIZE(x) STRINGIZEx(x) - -#ifndef HAVE_K7 -#define HAVE_K7 0 -#endif - -#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || defined(HAVE_3DNOW) -#define HAVE_SIMD 1 -#else -#define HAVE_SIMD 0 -#endif - -/* forward declarations */ -typedef struct problem_s problem; -typedef struct plan_s plan; -typedef struct solver_s solver; -typedef struct planner_s planner; -typedef struct printer_s printer; -typedef struct scanner_s scanner; - -/*-----------------------------------------------------------------------*/ -/* alloca: */ -#if HAVE_SIMD -#define MIN_ALIGNMENT 16 -#endif - -#ifdef HAVE_ALLOCA - /* use alloca if available */ - -#ifndef alloca -#ifdef __GNUC__ -# define alloca __builtin_alloca -#else -# ifdef _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# if HAVE_ALLOCA_H -# include <alloca.h> -# else -# ifdef _AIX - #pragma alloca -# else -# ifndef alloca /* predefined by HP cc +Olibcalls */ -void *alloca(size_t); -# endif -# endif -# endif -# endif -#endif -#endif - -# ifdef MIN_ALIGNMENT -# define STACK_MALLOC(T, p, x) \ - { \ - p = (T)alloca((x) + MIN_ALIGNMENT); \ - p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) & \ - (~(uintptr_t)(MIN_ALIGNMENT - 1))); \ - } -# define STACK_FREE(x) -# else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */ -# define STACK_MALLOC(T, p, x) p = (T)alloca(x) -# define STACK_FREE(x) -# endif - -#else /* ! HAVE_ALLOCA */ - /* use malloc instead of alloca */ -# define STACK_MALLOC(T, p, x) p = (T)MALLOC(x, OTHER) -# define STACK_FREE(x) X(ifree)(x) -#endif /* ! HAVE_ALLOCA */ - -/*-----------------------------------------------------------------------*/ -/* define uintptr_t if it is not already defined */ - -#ifndef HAVE_UINTPTR_T -# if SIZEOF_VOID_P == 0 -# error sizeof void* is unknown! -# elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P - typedef unsigned int uintptr_t; -# elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P - typedef unsigned long uintptr_t; -# elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P - typedef unsigned long long uintptr_t; -# else -# error no unsigned integer type matches void* sizeof! -# endif -#endif - -/*-----------------------------------------------------------------------*/ -/* assert.c: */ -extern void X(assertion_failed)(const char *s, int line, const char *file); - -/* always check */ -#define CK(ex) \ - (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) - -#ifdef FFTW_DEBUG -/* check only if debug enabled */ -#define A(ex) \ - (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) -#else -#define A(ex) /* nothing */ -#endif - -extern void X(debug)(const char *format, ...); -#define D X(debug) - -/*-----------------------------------------------------------------------*/ -/* alloc.c: */ - -/* objects allocated by malloc, for statistical purposes */ -enum malloc_tag { - EVERYTHING, - PLANS, - SOLVERS, - PROBLEMS, - BUFFERS, - HASHT, - TENSORS, - PLANNERS, - SLVDESCS, - TWIDDLES, - STRIDES, - OTHER, - MALLOC_WHAT_LAST /* must be last */ -}; - -extern void X(ifree)(void *ptr); -extern void X(ifree0)(void *ptr); - -#ifdef FFTW_DEBUG_MALLOC - -extern void *X(malloc_debug)(size_t n, enum malloc_tag what, - const char *file, int line); -#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__) -#define NATIVE_MALLOC(n, what) MALLOC(n, what) -void X(malloc_print_minfo)(int vrbose); - -#else /* ! FFTW_DEBUG_MALLOC */ - -extern void *X(malloc_plain)(size_t sz); -#define MALLOC(n, what) X(malloc_plain)(n) -#define NATIVE_MALLOC(n, what) malloc(n) - -#endif - -#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && defined(HAVE_THREADS) -extern int X(in_thread); -# define IN_THREAD X(in_thread) -# define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1 -# define THREAD_OFF X(in_thread) = in_thread_save; } -#else -# define IN_THREAD 0 -# define THREAD_ON -# define THREAD_OFF -#endif - -/*-----------------------------------------------------------------------*/ -/* ops.c: */ -/* - * ops counter. The total number of additions is add + fma - * and the total number of multiplications is mul + fma. - * Total flops = add + mul + 2 * fma - */ -typedef struct { - double add; - double mul; - double fma; - double other; -} opcnt; - -void X(ops_zero)(opcnt *dst); -void X(ops_other)(int o, opcnt *dst); -void X(ops_cpy)(const opcnt *src, opcnt *dst); - -void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst); -void X(ops_add2)(const opcnt *a, opcnt *dst); - -/* dst = m * a + b */ -void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst); - -/* dst += m * a */ -void X(ops_madd2)(int m, const opcnt *a, opcnt *dst); - - -/*-----------------------------------------------------------------------*/ -/* minmax.c: */ -int X(imax)(int a, int b); -int X(imin)(int a, int b); - -/*-----------------------------------------------------------------------*/ -/* iabs.c: */ -int X(iabs)(int a); - -/*-----------------------------------------------------------------------*/ -/* md5.c */ - -#if SIZEOF_UNSIGNED_INT >= 4 -typedef unsigned int md5uint; -#else -typedef unsigned long md5uint; /* at least 32 bits as per C standard */ -#endif - -typedef md5uint md5sig[4]; - -typedef struct { - md5sig s; /* state and signature */ - - /* fields not meant to be used outside md5.c: */ - unsigned char c[64]; /* stuff not yet processed */ - unsigned l; /* total length. Should be 64 bits long, but this is - good enough for us */ -} md5; - -void X(md5begin)(md5 *p); -void X(md5putb)(md5 *p, const void *d_, int len); -void X(md5puts)(md5 *p, const char *s); -void X(md5putc)(md5 *p, unsigned char c); -void X(md5int)(md5 *p, int i); -void X(md5unsigned)(md5 *p, unsigned i); -void X(md5ptrdiff)(md5 *p, ptrdiff_t d); -void X(md5end)(md5 *p); - -/*-----------------------------------------------------------------------*/ -/* tensor.c: */ -#define STRUCT_HACK_KR -#undef STRUCT_HACK_C99 - -typedef struct { - int n; - int is; /* input stride */ - int os; /* output stride */ -} iodim; - -typedef struct { - int rnk; -#if defined(STRUCT_HACK_KR) - iodim dims[1]; -#elif defined(STRUCT_HACK_C99) - iodim dims[]; -#else - iodim *dims; -#endif -} tensor; - -/* - Definition of rank -infinity. - This definition has the property that if you want rank 0 or 1, - you can simply test for rank <= 1. This is a common case. - - A tensor of rank -infinity has size 0. -*/ -#define RNK_MINFTY ((int)(((unsigned) -1) >> 1)) -#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY) - -typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; - -tensor *X(mktensor)(int rnk); -tensor *X(mktensor_0d)(void); -tensor *X(mktensor_1d)(int n, int is, int os); -tensor *X(mktensor_2d)(int n0, int is0, int os0, - int n1, int is1, int os1); -int X(tensor_sz)(const tensor *sz); -void X(tensor_md5)(md5 *p, const tensor *t); -int X(tensor_max_index)(const tensor *sz); -int X(tensor_min_istride)(const tensor *sz); -int X(tensor_min_ostride)(const tensor *sz); -int X(tensor_min_stride)(const tensor *sz); -int X(tensor_inplace_strides)(const tensor *sz); -int X(tensor_inplace_strides2)(const tensor *a, const tensor *b); -tensor *X(tensor_copy)(const tensor *sz); -int X(tensor_kosherp)(const tensor *x); - -tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k); -tensor *X(tensor_copy_except)(const tensor *sz, int except_dim); -tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk); -tensor *X(tensor_compress)(const tensor *sz); -tensor *X(tensor_compress_contiguous)(const tensor *sz); -tensor *X(tensor_append)(const tensor *a, const tensor *b); -void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b); -int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os); -void X(tensor_destroy)(tensor *sz); -void X(tensor_destroy2)(tensor *a, tensor *b); -void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d); -void X(tensor_print)(const tensor *sz, printer *p); -int X(dimcmp)(const iodim *a, const iodim *b); - -/*-----------------------------------------------------------------------*/ -/* problem.c: */ -typedef struct { - void (*hash) (const problem *ego, md5 *p); - void (*zero) (const problem *ego); - void (*print) (problem *ego, printer *p); - void (*destroy) (problem *ego); -} problem_adt; - -struct problem_s { - const problem_adt *adt; -}; - -problem *X(mkproblem)(size_t sz, const problem_adt *adt); -void X(problem_destroy)(problem *ego); - -/*-----------------------------------------------------------------------*/ -/* print.c */ -struct printer_s { - void (*print)(printer *p, const char *format, ...); - void (*vprint)(printer *p, const char *format, va_list ap); - void (*putchr)(printer *p, char c); - void (*cleanup)(printer *p); - int indent; - int indent_incr; -}; - -printer *X(mkprinter)(size_t size, - void (*putchr)(printer *p, char c), - void (*cleanup)(printer *p)); -void X(printer_destroy)(printer *p); - -/*-----------------------------------------------------------------------*/ -/* scan.c */ -struct scanner_s { - int (*scan)(scanner *sc, const char *format, ...); - int (*vscan)(scanner *sc, const char *format, va_list ap); - int (*getchr)(scanner *sc); - int ungotc; -}; - -scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)); -void X(scanner_destroy)(scanner *sc); - -/*-----------------------------------------------------------------------*/ -/* plan.c: */ -typedef struct { - void (*solve)(const plan *ego, const problem *p); - void (*awake)(plan *ego, int flag); - void (*print)(const plan *ego, printer *p); - void (*destroy)(plan *ego); -} plan_adt; - -struct plan_s { - const plan_adt *adt; - int awake_refcnt; - opcnt ops; - double pcost; -}; - -plan *X(mkplan)(size_t size, const plan_adt *adt); -void X(plan_destroy_internal)(plan *ego); -void X(plan_awake)(plan *ego, int flag); -#define AWAKE(plan, flag) X(plan_awake)(plan, flag) -void X(plan_null_destroy)(plan *ego); - -/*-----------------------------------------------------------------------*/ -/* solver.c: */ -typedef struct { - plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr); -} solver_adt; - -struct solver_s { - const solver_adt *adt; - int refcnt; -}; - -solver *X(mksolver)(size_t size, const solver_adt *adt); -void X(solver_use)(solver *ego); -void X(solver_destroy)(solver *ego); -void X(solver_register)(planner *plnr, solver *s); - -/* shorthand */ -#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt) - -/*-----------------------------------------------------------------------*/ -/* planner.c */ - -typedef struct slvdesc_s { - solver *slv; - const char *reg_nam; - unsigned nam_hash; - int reg_id; -} slvdesc; - -typedef struct solution_s solution; /* opaque */ - -/* values for problem_flags: */ -enum { - DESTROY_INPUT = 0x1, - NO_SIMD = 0x2, - CONSERVE_MEMORY = 0x4, - NO_DHT_R2HC = 0x8 -}; - -#define DESTROY_INPUTP(plnr) ((plnr)->problem_flags & DESTROY_INPUT) -#define NO_SIMDP(plnr) ((plnr)->problem_flags & NO_SIMD) -#define CONSERVE_MEMORYP(plnr) ((plnr)->problem_flags & CONSERVE_MEMORY) -#define NO_DHT_R2HCP(plnr) ((plnr)->problem_flags & NO_DHT_R2HC) - -/* values for planner_flags: */ -enum { - /* impatience flags */ - - BELIEVE_PCOST = 0x1, - DFT_R2HC_ICKY = 0x2, - NONTHREADED_ICKY = 0x4, - NO_BUFFERING = 0x8, - NO_EXHAUSTIVE = 0x10, - NO_INDIRECT_OP = 0x20, - NO_LARGE_GENERIC = 0x40, - NO_RANK_SPLITS = 0x80, - NO_VRANK_SPLITS = 0x100, - NO_VRECURSE = 0x200, - - /* flags that control the search */ - NO_UGLY = 0x400, /* avoid plans we are 99% sure are suboptimal */ - NO_SEARCH = 0x800, /* avoid searching altogether---use wisdom entries - only */ - - ESTIMATE = 0x1000, - IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)), - - BLESSING = 0x4000, /* save this entry */ - H_VALID = 0x8000, /* valid hastable entry */ - NONIMPATIENCE_FLAGS = BLESSING -}; - -#define BELIEVE_PCOSTP(plnr) ((plnr)->planner_flags & BELIEVE_PCOST) -#define DFT_R2HC_ICKYP(plnr) ((plnr)->planner_flags & DFT_R2HC_ICKY) -#define ESTIMATEP(plnr) ((plnr)->planner_flags & ESTIMATE) -#define NONTHREADED_ICKYP(plnr) (((plnr)->planner_flags & NONTHREADED_ICKY) \ - && (plnr)->nthr > 1) -#define NO_BUFFERINGP(plnr) ((plnr)->planner_flags & NO_BUFFERING) -#define NO_EXHAUSTIVEP(plnr) ((plnr)->planner_flags & NO_EXHAUSTIVE) -#define NO_INDIRECT_OP_P(plnr) ((plnr)->planner_flags & NO_INDIRECT_OP) -#define NO_LARGE_GENERICP(plnr) ((plnr)->planner_flags & NO_LARGE_GENERIC) -#define NO_RANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_RANK_SPLITS) -#define NO_UGLYP(plnr) ((plnr)->planner_flags & NO_UGLY) -#define NO_SEARCHP(plnr) ((plnr)->planner_flags & NO_SEARCH) -#define NO_VRANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_VRANK_SPLITS) -#define NO_VRECURSEP(plnr) ((plnr)->planner_flags & NO_VRECURSE) - -typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia; - -typedef struct { - void (*register_solver)(planner *ego, solver *s); - plan *(*mkplan)(planner *ego, problem *p); - void (*forget)(planner *ego, amnesia a); - void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved - word in C++. */ - int (*imprt)(planner *ego, scanner *sc); -} planner_adt; - -struct planner_s { - const planner_adt *adt; - void (*hook)(plan *pln, const problem *p, int optimalp); - - /* solver descriptors */ - slvdesc *slvdescs; - unsigned nslvdesc, slvdescsiz; - const char *cur_reg_nam; - int cur_reg_id; - - /* hash table of solutions */ - solution *solutions; - unsigned hashsiz, nelem; - - int nthr; - unsigned problem_flags; - unsigned short planner_flags; /* matches type of solution.flags in - planner.c */ - /* various statistics */ - int nplan; /* number of plans evaluated */ - double pcost, epcost; /* total pcost of measured/estimated plans */ - int nprob; /* number of problems evaluated */ - int lookup, succ_lookup, lookup_iter; - int insert, insert_iter, insert_unknown; - int nrehash; -}; - -planner *X(mkplanner)(void); -void X(planner_destroy)(planner *ego); - -#ifdef FFTW_DEBUG -void X(planner_dump)(planner *ego, int vrbose); -#endif - -/* - Iterate over all solvers. Read: - - @article{ baker93iterators, - author = "Henry G. Baker, Jr.", - title = "Iterators: Signs of Weakness in Object-Oriented Languages", - journal = "{ACM} {OOPS} Messenger", - volume = "4", - number = "3", - pages = "18--25" - } -*/ -#define FORALL_SOLVERS(ego, s, p, what) \ -{ \ - unsigned _cnt; \ - for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) { \ - slvdesc *p = ego->slvdescs + _cnt; \ - solver *s = p->slv; \ - what; \ - } \ -} - -/* make plan, destroy problem */ -plan *X(mkplan_d)(planner *ego, problem *p); - -/*-----------------------------------------------------------------------*/ -/* stride.c: */ - -/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */ -#if (defined(__i386__) || _M_IX86 >= 500) && !HAVE_K7 && !defined(FFTW_LDOUBLE) -#define PRECOMPUTE_ARRAY_INDICES -#endif - -#ifdef PRECOMPUTE_ARRAY_INDICES -typedef int *stride; -#define WS(stride, i) (stride[i]) -extern stride X(mkstride)(int n, int s); -void X(stride_destroy)(stride p); - -#else - -typedef int stride; -#define WS(stride, i) (stride * i) -#define fftwf_mkstride(n, stride) stride -#define fftw_mkstride(n, stride) stride -#define fftwl_mkstride(n, stride) stride -#define fftwf_stride_destroy(p) ((void) p) -#define fftw_stride_destroy(p) ((void) p) -#define fftwl_stride_destroy(p) ((void) p) - -#endif /* PRECOMPUTE_ARRAY_INDICES */ - -/*-----------------------------------------------------------------------*/ -/* solvtab.c */ - -struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; -typedef struct solvtab_s solvtab[]; -void X(solvtab_exec)(solvtab tbl, planner *p); -#define SOLVTAB(s) { s, STRINGIZE(s) } -#define SOLVTAB_END { 0, 0 } - -/*-----------------------------------------------------------------------*/ -/* pickdim.c */ -int X(pickdim)(int which_dim, const int *buddies, int nbuddies, - const tensor *sz, int oop, int *dp); - -/*-----------------------------------------------------------------------*/ -/* twiddle.c */ -/* little language to express twiddle factors computation */ -enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3, - TW_FULL = 4, TW_GENERIC = 5 }; - -typedef struct { - unsigned char op; - unsigned char v; - short i; -} tw_instr; - -typedef struct twid_s { - R *W; /* array of twiddle factors */ - int n, r, m; /* transform order, radix, # twiddle rows */ - int refcnt; - const tw_instr *instr; - struct twid_s *cdr; -} twid; - -void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m); -void X(twiddle_destroy)(twid **pp); -int X(twiddle_length)(int r, const tw_instr *p); -void X(twiddle_awake)(int flg, twid **pp, - const tw_instr *instr, int n, int r, int m); - -/*-----------------------------------------------------------------------*/ -/* trig.c */ -#ifdef FFTW_LDOUBLE -typedef long double trigreal; -#else -typedef double trigreal; -#endif - -extern trigreal X(cos2pi)(int, int); -extern trigreal X(sin2pi)(int, int); -extern trigreal X(tan2pi)(int, int); -extern trigreal X(sincos)(trigreal m, trigreal n, int sinp); - -/*-----------------------------------------------------------------------*/ -/* primes.c: */ - -#if defined(FFTW_ENABLE_UNSAFE_MULMOD) -# define MULMOD(x,y,p) (((x) * (y)) % (p)) -#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG >= 2 * SIZEOF_INT)) -# define MULMOD(x,y,p) ((int) ((((long) (x)) * ((long) (y))) % ((long) (p)))) -#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT)) -# define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \ - % ((long long) (p)))) -#elif defined(_MSC_VER) -# define MULMOD(x,y,p) ((int) ((((__int64) (x)) * ((__int64) (y))) \ - % ((__int64) (p)))) -#else /* 'long long' unavailable */ -# define SAFE_MULMOD 1 -int X(safe_mulmod)(int x, int y, int p); -# define MULMOD(x,y,p) X(safe_mulmod)(x,y,p) -#endif - -int X(power_mod)(int n, int m, int p); -int X(find_generator)(int p); -int X(first_divisor)(int n); -int X(is_prime)(int n); -int X(next_prime)(int n); - -#define GENERIC_MIN_BAD 71 /* min prime for which generic becomes bad */ - -/*-----------------------------------------------------------------------*/ -/* rader.c: */ -typedef struct rader_tls rader_tl; - -void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl); -R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t); -void X(rader_tl_delete)(R *W, rader_tl **tl); - -/*-----------------------------------------------------------------------*/ -/* transpose.c: */ - -void X(transpose)(R *A, int n, int m, int d, int N, R *buf); -void X(transpose_slow)(R *a, int nx, int ny, int N, - char *move, int move_size, R *buf); -int X(transposable)(const iodim *a, const iodim *b, - int vl, int s, R *ri, R *ii); -void X(transpose_dims)(const iodim *a, const iodim *b, - int *n, int *m, int *d, int *nd, int *md); -int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s, - R *ri, R *ii); -int X(transpose_slowp)(const iodim *a, const iodim *b, int N); - -/*-----------------------------------------------------------------------*/ -/* misc stuff */ -void X(null_awake)(plan *ego, int awake); -int X(square)(int x); -double X(measure_execution_time)(plan *pln, const problem *p); -int X(alignment_of)(R *p); -unsigned X(hash)(const char *s); -int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz); -int X(ct_uglyp)(int min_n, int n, int r); - -#if HAVE_SIMD -R *X(taint)(R *p, int s); -R *X(join_taint)(R *p1, R *p2); -#define TAINT(p, s) X(taint)(p, s) -#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3)) -#define TAINTOF(p) (((uintptr_t)(p)) & 3) -#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2) -#else -#define TAINT(p, s) (p) -#define UNTAINT(p) (p) -#define TAINTOF(p) 0 -#define JOIN_TAINT(p1, p2) p1 -#endif - -#ifdef FFTW_DEBUG_ALIGNMENT -# define ASSERT_ALIGNED_DOUBLE { \ - double __foo; \ - CK(!(((uintptr_t) &__foo) & 0x7)); \ -} -#else -# define ASSERT_ALIGNED_DOUBLE -#endif /* FFTW_DEBUG_ALIGNMENT */ - - - -/*-----------------------------------------------------------------------*/ -/* macros used in codelets to reduce source code size */ - -typedef R E; /* internal precision of codelets. */ - -#ifdef FFTW_LDOUBLE -# define K(x) ((E) x##L) -#else -# define K(x) ((E) x) -#endif -#define DK(name, value) const E name = K(value) - -/* FMA macros */ - -#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__)) -/* this peculiar coding seems to do the right thing on all of - gcc-2.95, gcc-3.1, and gcc-3.2. - - The obvious expression a * b + c does not work. If both x = a * b - + c and y = a * b - c appear in the source, gcc computes t = a * b, - x = t + c, y = t - c, thus destroying the fma. -*/ -static __inline__ E FMA(E a, E b, E c) -{ - E x = a * b; - x = x + c; - return x; -} - -static __inline__ E FMS(E a, E b, E c) -{ - E x = a * b; - x = x - c; - return x; -} - -static __inline__ E FNMA(E a, E b, E c) -{ - E x = a * b; - x = - (x + c); - return x; -} - -static __inline__ E FNMS(E a, E b, E c) -{ - E x = a * b; - x = - (x - c); - return x; -} -#else -#define FMA(a, b, c) (((a) * (b)) + (c)) -#define FMS(a, b, c) (((a) * (b)) - (c)) -#define FNMA(a, b, c) (- (((a) * (b)) + (c))) -#define FNMS(a, b, c) ((c) - ((a) * (b))) -#endif - - -/* stack-alignment hackery */ -#if defined(__GNUC__) && defined(__i386__) -/* - * horrible hack to align the stack to a 16-byte boundary. - * - * We assume a gcc version >= 2.95 so that - * -mpreferred-stack-boundary works. Otherwise, all bets are - * off. However, -mpreferred-stack-boundary does not create a - * stack alignment, but it only preserves it. Unfortunately, - * many versions of libc on linux call main() with the wrong - * initial stack alignment, with the result that the code is now - * pessimally aligned instead of having a 50% chance of being - * correct. - */ - -#define WITH_ALIGNED_STACK(what) \ -{ \ - /* \ - * Use alloca to allocate some memory on the stack. \ - * This alerts gcc that something funny is going \ - * on, so that it does not omit the frame pointer \ - * etc. \ - */ \ - (void)__builtin_alloca(16); \ - \ - /* \ - * Now align the stack pointer \ - */ \ - __asm__ __volatile__ ("andl $-16, %esp"); \ - \ - what \ -} -#endif - -#ifdef __ICC /* Intel's compiler for ia32 */ -#define WITH_ALIGNED_STACK(what) \ -{ \ - /* \ - * Simply calling alloca seems to do the right thing. \ - * The size of the allocated block seems to be irrelevant. \ - */ \ - _alloca(16); \ - what \ -} -#endif - -#ifndef WITH_ALIGNED_STACK -#define WITH_ALIGNED_STACK(what) what -#endif - -#endif /* __IFFTW_H__ */ diff --git a/src/fftw3/kernel/kbuffered.c b/src/fftw3/kernel/kbuffered.c deleted file mode 100644 index d20403f..0000000 --- a/src/fftw3/kernel/kbuffered.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* routines shared by the various buffered solvers */ - -#include "ifftw.h" - -int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz) -{ - int i; - - if (nbuf * n > maxbufsz) - nbuf = X(imax)((int)1, maxbufsz / n); - - /* - * Look for a buffer number (not too big) that divides the - * vector length, in order that we only need one child plan: - */ - for (i = nbuf; i < vl && i < 2 * nbuf; ++i) - if (vl % i == 0) - return i; - - /* whatever... */ - nbuf = X(imin)(nbuf, vl); - return nbuf; -} - diff --git a/src/fftw3/kernel/kct.c b/src/fftw3/kernel/kct.c deleted file mode 100644 index ba3bf68..0000000 --- a/src/fftw3/kernel/kct.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* common routines for Cooley-Tukey algorithms */ - -#include "ifftw.h" - -#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0)) - -/* TRUE if radix-r is ugly for size n */ -int X(ct_uglyp)(int min_n, int n, int r) -{ - return (n <= min_n) || (POW2P(n) && (n / r) <= 4); -} diff --git a/src/fftw3/kernel/kplan.c b/src/fftw3/kernel/kplan.c deleted file mode 100644 index 4c17107..0000000 --- a/src/fftw3/kernel/kplan.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: kplan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -/* "Plan: To bother about the best method of accomplishing an - accidental result." (Ambrose Bierce, The Enlarged Devil's - Dictionary). */ - -plan *X(mkplan)(size_t size, const plan_adt *adt) -{ - plan *p = (plan *)MALLOC(size, PLANS); - - A(adt->destroy); - p->awake_refcnt = 0; - p->adt = adt; - X(ops_zero)(&p->ops); - p->pcost = 0.0; - - return p; -} - -/* - * destroy a plan - */ -void X(plan_destroy_internal)(plan *ego) -{ - if (ego) { - if (ego->awake_refcnt > 0) - ego->adt->awake(ego, 0); - ego->adt->destroy(ego); - X(ifree)(ego); - } -} - -/* dummy destroy routine for plans with no local state */ -void X(plan_null_destroy)(plan *ego) -{ - UNUSED(ego); - /* nothing */ -} - -void X(plan_awake)(plan *ego, int flag) -{ - if (flag) { - if (!ego->awake_refcnt) - ego->adt->awake(ego, flag); - ++ego->awake_refcnt; - } else { - --ego->awake_refcnt; - if (!ego->awake_refcnt) - ego->adt->awake(ego, flag); - } -} - diff --git a/src/fftw3/kernel/kproblem.c b/src/fftw3/kernel/kproblem.c deleted file mode 100644 index 4686692..0000000 --- a/src/fftw3/kernel/kproblem.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: kproblem.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -/* constructor */ -problem *X(mkproblem)(size_t sz, const problem_adt *adt) -{ - problem *p = (problem *)MALLOC(sz, PROBLEMS); - - p->adt = adt; - return p; -} - -/* destructor */ -void X(problem_destroy)(problem *ego) -{ - if (ego) - ego->adt->destroy(ego); -} - diff --git a/src/fftw3/kernel/krader.c b/src/fftw3/kernel/krader.c deleted file mode 100644 index 354edfc..0000000 --- a/src/fftw3/kernel/krader.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "ifftw.h" - -/* - common routines for Rader solvers -*/ - - -/* shared twiddle and omega lists, keyed by two/three integers. */ -struct rader_tls { - int k1, k2, k3; - R *W; - int refcnt; - rader_tl *cdr; -}; - -void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl) -{ - rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES); - t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W; - t->refcnt = 1; t->cdr = *tl; *tl = t; -} - -R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t) -{ - while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3)) - t = t->cdr; - if (t) { - ++t->refcnt; - return t->W; - } else - return 0; -} - -void X(rader_tl_delete)(R *W, rader_tl **tl) -{ - if (W) { - rader_tl **tp, *t; - - for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr) - ; - - if (t && --t->refcnt <= 0) { - *tp = t->cdr; - X(ifree)(t->W); - X(ifree)(t); - } - } -} diff --git a/src/fftw3/kernel/md5-1.c b/src/fftw3/kernel/md5-1.c deleted file mode 100644 index a2e87a1..0000000 --- a/src/fftw3/kernel/md5-1.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "ifftw.h" - - -void X(md5putb)(md5 *p, const void *d_, int len) -{ - int i; - const unsigned char *d = (const unsigned char *)d_; - for (i = 0; i < len; ++i) - X(md5putc)(p, d[i]); -} - -void X(md5puts)(md5 *p, const char *s) -{ - /* also hash final '\0' */ - do { - X(md5putc)(p, *s); - } while(*s++); -} - -void X(md5int)(md5 *p, int i) -{ - X(md5putb)(p, &i, sizeof(i)); -} - -void X(md5unsigned)(md5 *p, unsigned i) -{ - X(md5putb)(p, &i, sizeof(i)); -} - -void X(md5ptrdiff)(md5 *p, ptrdiff_t d) -{ - X(md5putb)(p, &d, sizeof(d)); -} - diff --git a/src/fftw3/kernel/md5.c b/src/fftw3/kernel/md5.c deleted file mode 100644 index edbd811..0000000 --- a/src/fftw3/kernel/md5.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* - independent implementation of Ron Rivest's MD5 message-digest - algorithm, based on rfc 1321. - - Optimized for small code size, not speed. Works as long as - sizeof(md5uint) >= 4. -*/ - -#include "ifftw.h" - -/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */ -static const md5uint sintab[64] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, - 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, - 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, - 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, - 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, - 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, - 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, - 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, - 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 -}; - -/* see rfc 1321 section 3.4 */ -static const struct roundtab { - char k; - char s; -} roundtab[64] = { - { 0, 7}, { 1, 12}, { 2, 17}, { 3, 22}, - { 4, 7}, { 5, 12}, { 6, 17}, { 7, 22}, - { 8, 7}, { 9, 12}, { 10, 17}, { 11, 22}, - { 12, 7}, { 13, 12}, { 14, 17}, { 15, 22}, - { 1, 5}, { 6, 9}, { 11, 14}, { 0, 20}, - { 5, 5}, { 10, 9}, { 15, 14}, { 4, 20}, - { 9, 5}, { 14, 9}, { 3, 14}, { 8, 20}, - { 13, 5}, { 2, 9}, { 7, 14}, { 12, 20}, - { 5, 4}, { 8, 11}, { 11, 16}, { 14, 23}, - { 1, 4}, { 4, 11}, { 7, 16}, { 10, 23}, - { 13, 4}, { 0, 11}, { 3, 16}, { 6, 23}, - { 9, 4}, { 12, 11}, { 15, 16}, { 2, 23}, - { 0, 6}, { 7, 10}, { 14, 15}, { 5, 21}, - { 12, 6}, { 3, 10}, { 10, 15}, { 1, 21}, - { 8, 6}, { 15, 10}, { 6, 15}, { 13, 21}, - { 4, 6}, { 11, 10}, { 2, 15}, { 9, 21} -}; - -#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s)))) - -static void doblock(md5sig state, const unsigned char *data) -{ - md5uint a, b, c, d, t, x[16]; - const md5uint msk = 0xffffffffUL; - int i; - - /* encode input bytes into md5uint */ - for (i = 0; i < 16; ++i) { - const unsigned char *p = data + 4 * i; - x[i] = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); - } - - a = state[0]; b = state[1]; c = state[2]; d = state[3]; - for (i = 0; i < 64; ++i) { - const struct roundtab *p = roundtab + i; - int round = i / 16; - switch (round) { - case 0: a += (b & c) | (~b & d); break; - case 1: a += (b & d) | (c & ~d); break; - case 2: a += b ^ c ^ d; break; - case 3: a += c ^ (b | ~d); break; - } - a += sintab[i]; - a += x[(int)(p->k)]; - a &= msk; - t = b + rol(a, p->s); - a = d; d = c; c = b; b = t; - } - state[0] = (state[0] + a) & msk; - state[1] = (state[1] + b) & msk; - state[2] = (state[2] + c) & msk; - state[3] = (state[3] + d) & msk; -} - - -void X(md5begin)(md5 *p) -{ - p->s[0] = 0x67452301; - p->s[1] = 0xefcdab89; - p->s[2] = 0x98badcfe; - p->s[3] = 0x10325476; - p->l = 0; -} - -void X(md5putc)(md5 *p, unsigned char c) -{ - p->c[p->l % 64] = c; - if (((++p->l) % 64) == 0) doblock(p->s, p->c); -} - -void X(md5end)(md5 *p) -{ - unsigned l, i; - - l = 8 * p->l; /* length before padding, in bits */ - - /* rfc 1321 section 3.1: padding */ - X(md5putc)(p, 0x80); - while ((p->l % 64) != 56) X(md5putc)(p, 0x00); - - /* rfc 1321 section 3.2: length (little endian) */ - for (i = 0; i < 8; ++i) { - X(md5putc)(p, l & 0xFF); - l = l >> 8; - } - - /* Now p->l % 64 == 0 and signature is in p->s */ -} diff --git a/src/fftw3/kernel/minmax.c b/src/fftw3/kernel/minmax.c deleted file mode 100644 index fc6f3a3..0000000 --- a/src/fftw3/kernel/minmax.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: minmax.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -int X(imax)(int a, int b) -{ - return (a > b) ? a : b; -} - -int X(imin)(int a, int b) -{ - return (a < b) ? a : b; -} diff --git a/src/fftw3/kernel/ops.c b/src/fftw3/kernel/ops.c deleted file mode 100644 index 9e927c1..0000000 --- a/src/fftw3/kernel/ops.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: ops.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -void X(ops_zero)(opcnt *dst) -{ - dst->add = dst->mul = dst->fma = dst->other = 0; -} - -void X(ops_cpy)(const opcnt *src, opcnt *dst) -{ - *dst = *src; -} - -void X(ops_other)(int o, opcnt *dst) -{ - X(ops_zero)(dst); - dst->other = o; -} - -void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst) -{ - dst->add = m * a->add + b->add; - dst->mul = m * a->mul + b->mul; - dst->fma = m * a->fma + b->fma; - dst->other = m * a->other + b->other; -} - -void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst) -{ - X(ops_madd)(1, a, b, dst); -} - -void X(ops_add2)(const opcnt *a, opcnt *dst) -{ - X(ops_add)(a, dst, dst); -} - -void X(ops_madd2)(int m, const opcnt *a, opcnt *dst) -{ - X(ops_madd)(m, a, dst, dst); -} - diff --git a/src/fftw3/kernel/pickdim.c b/src/fftw3/kernel/pickdim.c deleted file mode 100644 index 034e6db..0000000 --- a/src/fftw3/kernel/pickdim.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "ifftw.h" - -/* $Id: pickdim.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* Given a solver which_dim, a vector sz, and whether or not the - transform is out-of-place, return the actual dimension index that - it corresponds to. The basic idea here is that we return the - which_dim'th valid dimension, starting from the end if - which_dim < 0. */ -static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp) -{ - int i; - int count_ok = 0; - if (which_dim > 0) { - for (i = 0; i < sz->rnk; ++i) { - if (oop || sz->dims[i].is == sz->dims[i].os) - if (++count_ok == which_dim) { - *dp = i; - return 1; - } - } - } - else if (which_dim < 0) { - for (i = sz->rnk; i > 0; --i) { - if (oop || sz->dims[i - 1].is == sz->dims[i - 1].os) - if (++count_ok == -which_dim) { - *dp = i - 1; - return 1; - } - } - } - else { /* zero: pick the middle, if valid */ - i = sz->rnk / 2 - 1; - if (i < sz->rnk && (oop || sz->dims[i].is == sz->dims[i].os)) { - *dp = i; - return 1; - } - } - return 0; -} - -/* Like really_pickdim, but only returns 1 if no previous "buddy" - which_dim in the buddies list would give the same dim. */ -int X(pickdim)(int which_dim, const int *buddies, int nbuddies, - const tensor *sz, int oop, int *dp) -{ - int i, d1; - - if (!really_pickdim(which_dim, sz, oop, dp)) - return 0; - - /* check whether some buddy solver would produce the same dim. - If so, consider this solver unapplicable and let the buddy - take care of it. The smallest-indexed buddy is applicable. */ - for (i = 0; i < nbuddies; ++i) { - if (buddies[i] == which_dim) - break; /* found self */ - if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1) - return 0; /* found equivalent buddy */ - } - return 1; -} diff --git a/src/fftw3/kernel/planner.c b/src/fftw3/kernel/planner.c deleted file mode 100644 index 1d01cb9..0000000 --- a/src/fftw3/kernel/planner.c +++ /dev/null @@ -1,695 +0,0 @@ -/* - * Copyright (c) 2000 Matteo Frigo - * Copyright (c) 2000 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: planner.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ -#include "ifftw.h" -#include <string.h> - -/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU - program in English, because English is the one language that nearly - all programmers in all countries can read." - - ingemisco tanquam reus - culpa rubet vultus meus - supplicanti parce [rms] -*/ - -#define BLESSEDP(solution) ((solution)->flags & BLESSING) -#define VALIDP(solution) ((solution)->flags & H_VALID) -#define UNBLESS(flags) flags &= ~BLESSING - -#define MAXNAM 64 /* maximum length of registrar's name. - Used for reading wisdom. There is no point - in doing this right */ - -/* Flags f1 subsumes flags f2 iff f1 is less/equally impatient than - f2, defining a partial ordering. */ -#define IMPATIENCE(flags) ((flags) & IMPATIENCE_FLAGS) -#define NONIMPATIENCE(flags) ((flags) & NONIMPATIENCE_FLAGS) -#define ORDERED(f1, f2) (SUBSUMES(f1, f2) || SUBSUMES(f2, f1)) -#define SUBSUMES(f1, f2) ((IMPATIENCE(f1) & (f2)) == IMPATIENCE(f1)) - -static unsigned addmod(unsigned a, unsigned b, unsigned p) -{ - /* gcc-2.95/sparc produces incorrect code for the fast version below. */ -#if defined(__sparc__) && defined(__GNUC__) - /* slow version */ - return (a + b) % p; -#else - /* faster version */ - unsigned c = a + b; - return c >= p ? c - p : c; -#endif -} - -/* - slvdesc management: -*/ -static void sgrow(planner *ego) -{ - unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4; - slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS); - slvdesc *otab = ego->slvdescs; - unsigned i; - - ego->slvdescs = ntab; - ego->slvdescsiz = nsiz; - for (i = 0; i < osiz; ++i) - ntab[i] = otab[i]; - X(ifree0)(otab); -} - -static void register_solver(planner *ego, solver *s) -{ - slvdesc *n; - if (s) { /* add s to solver list */ - X(solver_use)(s); - - if (ego->nslvdesc >= ego->slvdescsiz) - sgrow(ego); - - n = ego->slvdescs + ego->nslvdesc++; - - n->slv = s; - n->reg_nam = ego->cur_reg_nam; - n->reg_id = ego->cur_reg_id++; - - A(strlen(n->reg_nam) < MAXNAM); - n->nam_hash = X(hash)(n->reg_nam); - } -} - -static int slookup(planner *ego, char *nam, int id) -{ - unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */ - FORALL_SOLVERS(ego, s, sp, { - UNUSED(s); - if (sp->reg_id == id && sp->nam_hash == h - && !strcmp(sp->reg_nam, nam)) - return sp - ego->slvdescs; - }); - return -1; -} - -/* - md5-related stuff: -*/ - -/* first hash function */ -static unsigned h1(planner *ego, const md5sig s) -{ - return s[0] % ego->hashsiz; -} - -/* second hash function (for double hashing) */ -static unsigned h2(planner *ego, const md5sig s) -{ - return 1U + s[1] % (ego->hashsiz - 1); -} - -static void md5hash(md5 *m, const problem *p, const planner *plnr) -{ - X(md5begin)(m); - X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */ - X(md5unsigned)(m, plnr->problem_flags); - X(md5int)(m, plnr->nthr); - p->adt->hash(p, m); - X(md5end)(m); -} - -static int md5eq(const md5sig a, const md5sig b) -{ - return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]; -} - -static void sigcpy(const md5sig a, md5sig b) -{ - b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; -} - -/* - memoization routines : -*/ - -/* - liber scriptus proferetur - in quo totum continetur - unde mundus iudicetur -*/ -struct solution_s { - md5sig s; - unsigned short flags; - short slvndx; -}; - -static solution *hlookup(planner *ego, const md5sig s, unsigned short flags) -{ - unsigned g, h = h1(ego, s), d = h2(ego, s); - - ++ego->lookup; - - for (g = h; ; g = addmod(g, d, ego->hashsiz)) { - solution *l = ego->solutions + g; - ++ego->lookup_iter; - if (VALIDP(l)) { - if (md5eq(s, l->s) && ORDERED(l->flags, flags)) { - ++ego->succ_lookup; - return l; - } - } else { - return 0; - } - A((g + d) % ego->hashsiz != h); - } -} - - -static void hinsert0(planner *ego, const md5sig s, unsigned short flags, - int slvndx, solution *l) -{ - ++ego->insert; - if (!l) { - /* search for nonfull slot */ - unsigned g, h = h1(ego, s), d = h2(ego, s); - ++ego->insert_unknown; - for (g = h; ; g = addmod(g, d, ego->hashsiz)) { - ++ego->insert_iter; - l = ego->solutions + g; - if (!VALIDP(l)) break; - A((g + d) % ego->hashsiz != h); - } - } - - /* fill slot */ - l->flags = flags | H_VALID; - l->slvndx = (short)slvndx; - sigcpy(s, l->s); -} - -static void rehash(planner *ego, unsigned nsiz) -{ - unsigned osiz = ego->hashsiz, h; - solution *osol = ego->solutions, *nsol; - - nsiz = (unsigned)X(next_prime)((int)nsiz); - nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT); - ++ego->nrehash; - - /* init new table */ - for (h = 0; h < nsiz; ++h) - nsol[h].flags = 0; - - /* install new table */ - ego->hashsiz = nsiz; - ego->solutions = nsol; - - /* copy table */ - for (h = 0; h < osiz; ++h) { - solution *l = osol + h; - if (VALIDP(l)) - hinsert0(ego, l->s, l->flags, l->slvndx, 0); - } - - X(ifree0)(osol); -} - -static unsigned minsz(unsigned nelem) -{ - return 1U + nelem + nelem / 8U; -} - -static unsigned nextsz(unsigned nelem) -{ - return minsz(minsz(nelem)); -} - -static void hgrow(planner *ego) -{ - unsigned nelem = ego->nelem; - if (minsz(nelem) >= ego->hashsiz) - rehash(ego, nextsz(nelem)); -} - -static void hshrink(planner *ego) -{ - unsigned nelem = ego->nelem; - /* always rehash after deletions */ - rehash(ego, nextsz(nelem)); -} - -/* inherit blessing, but only if the solver is the same */ -static unsigned short merge_flags(unsigned short dstflags, int dstndx, - unsigned short srcflags, int srcndx) -{ - if (srcndx == dstndx) - dstflags |= (srcflags & BLESSING); /* ne me perdas illa die */ - return dstflags; -} - -static void hinsert(planner *ego, const md5sig s, - unsigned short flags, int slvndx) -{ - solution *l; - - if ((l = hlookup(ego, s, flags))) { - if (SUBSUMES(flags, l->flags)) { - /* overwrite old solution */ - flags = merge_flags(flags, slvndx, l->flags, l->slvndx); - } else { - A(SUBSUMES(l->flags, flags)); - l->flags = merge_flags(l->flags, l->slvndx, flags, slvndx); - return; - } - } else { - ++ego->nelem; - hgrow(ego); - } - hinsert0(ego, s, flags, slvndx, l); -} - -static void hcurse_subsumed(planner *ego) -{ - unsigned h; - - /* unbless any entries that are unreachable because they - are subsumed by less-impatient ones. */ - for (h = 0; h < ego->hashsiz; ++h) { - solution *l = ego->solutions + h; - if (VALIDP(l)) { - unsigned d = h2(ego, l->s), g = addmod(h, d, ego->hashsiz); - for (; ; g = addmod(g, d, ego->hashsiz)) { - solution *m = ego->solutions + g; - if (VALIDP(m)) { - if (md5eq(l->s, m->s) && - SUBSUMES(l->flags, m->flags)) { - /* ne cadant in obscurum */ - l->flags = merge_flags(l->flags, l->slvndx, - m->flags, m->slvndx); - - /* cum vix justus sit securus */ - UNBLESS(m->flags); - } - } - else break; - A((g + d) % ego->hashsiz != h); - } - } - } -} - - -static void invoke_hook(planner *ego, plan *pln, const problem *p, - int optimalp) -{ - if (ego->hook) - ego->hook(pln, p, optimalp); -} - -static void evaluate_plan(planner *ego, plan *pln, const problem *p) -{ - if (!BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) { - ego->nplan++; - - if (ESTIMATEP(ego)) { - estimate: - /* heuristic */ - pln->pcost = 0.0 - + pln->ops.add - + pln->ops.mul - + 2 * pln->ops.fma - + pln->ops.other; - ego->epcost += pln->pcost; - } else { - double t = X(measure_execution_time)(pln, p); - - if (t < 0) { /* unavailable cycle counter */ - /* Real programmers can write FORTRAN in any language */ - goto estimate; - } - - pln->pcost = t; - ego->pcost += t; - } - } - - invoke_hook(ego, pln, p, 0); -} - -/* maintain dynamic scoping of flags, nthr: */ -static plan *invoke_solver(planner *ego, problem *p, solver *s, - unsigned short nflags) -{ - unsigned short planner_flags = ego->planner_flags; - unsigned problem_flags = ego->problem_flags; - int nthr = ego->nthr; - plan *pln; - ego->planner_flags = nflags; - pln = s->adt->mkplan(s, p, ego); - ego->problem_flags = problem_flags; - ego->nthr = nthr; - ego->planner_flags = planner_flags; - return pln; -} - -static plan *search(planner *ego, problem *p, slvdesc **descp) -{ - plan *best = 0; - int best_not_yet_timed = 1; - int pass; - - if (NO_SEARCHP(ego)) { - /* D("invalid search for %P %x\n", p, ego->planner_flags); */ - return 0; - } - - for (pass = 0; pass < 2; ++pass) { - unsigned short nflags = ego->planner_flags; - - if (best) break; - - switch (pass) { - case 0: - /* skip pass 0 during exhaustive search */ - if (!NO_EXHAUSTIVEP(ego)) continue; - nflags |= NO_UGLY; - break; - case 1: - /* skip pass 1 if NO_UGLY */ - if (NO_UGLYP(ego)) continue; - break; - } - - FORALL_SOLVERS(ego, s, sp, { - plan *pln = invoke_solver(ego, p, s, nflags); - - if (pln) { - if (best) { - if (best_not_yet_timed) { - evaluate_plan(ego, best, p); - best_not_yet_timed = 0; - } - evaluate_plan(ego, pln, p); - if (pln->pcost < best->pcost) { - X(plan_destroy_internal)(best); - best = pln; - *descp = sp; - } else { - X(plan_destroy_internal)(pln); - } - } else { - best = pln; - *descp = sp; - } - } - }); - } - - return best; -} - -static plan *mkplan(planner *ego, problem *p) -{ - plan *pln; - md5 m; - slvdesc *sp; - unsigned short flags; - ASSERT_ALIGNED_DOUBLE; - - /* Canonical form. */ - if (!NO_EXHAUSTIVEP(ego)) ego->planner_flags &= ~NO_UGLY; - - ++ego->nprob; - md5hash(&m, p, ego); - - pln = 0; - - { - solution *sol; /* new scope for sol */ - - if ((sol = hlookup(ego, m.s, ego->planner_flags))) { - if (SUBSUMES(sol->flags, ego->planner_flags)) { - /* wisdom is acceptable */ - if (sol->slvndx < 0) - return 0; /* known to be infeasible */ - - /* use solver to obtain a plan */ - sp = ego->slvdescs + sol->slvndx; - pln = - invoke_solver(ego, p, sp->slv, - (0 - | NO_SEARCH - | IMPATIENCE(sol->flags) - | NONIMPATIENCE(ego->planner_flags) )); - - /* if (!pln) then the entry is bogus, but - we currently do nothing about it. */ - /* CAVEAS: Do not use ``sol'' here, because the - pointer is possibly dangling after the call to - invoke_solver(). */ - } else { - A(SUBSUMES(ego->planner_flags, sol->flags)); - } - } - } - - - if (!pln) - pln = search(ego, p, &sp); - - flags = ego->planner_flags; - - if (pln) { - /* Postulate de iure that NO_UGLY subsumes ~NO_UGLY if the - problem is feasible. Also postulate that NO_SEARCH - subsumes ~NO_SEARCH. */ - flags &= ~(NO_UGLY | NO_SEARCH); - } - - hinsert(ego, m.s, flags, pln ? sp - ego->slvdescs : -1); - - if (pln) - invoke_hook(ego, pln, p, 1); - return pln; -} - -/* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole - table. If FORGET_ACCURSED, then destroy entries that are not blessed. */ -static void forget(planner *ego, amnesia a) -{ - unsigned h; - - /* garbage-collect while we are at it */ - if (a != FORGET_EVERYTHING) - hcurse_subsumed(ego); - - for (h = 0; h < ego->hashsiz; ++h) { - solution *l = ego->solutions + h; - if (VALIDP(l)) { - if (a == FORGET_EVERYTHING || - (a == FORGET_ACCURSED && !BLESSEDP(l))) { - /* confutatis maledictis - flammis acribus addictis */ - l->flags &= ~H_VALID; - --ego->nelem; - } - } - } - /* nil inultum remanebit */ - - hshrink(ego); -} - -static void htab_destroy(planner *ego) -{ - forget(ego, FORGET_EVERYTHING); - X(ifree)(ego->solutions); - ego->nelem = 0U; -} - -/* FIXME: what sort of version information should we write? */ -#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom)) - -/* tantus labor non sit cassus */ -static void exprt(planner *ego, printer *p) -{ - unsigned h; - - hcurse_subsumed(ego); - - p->print(p, "(" WISDOM_PREAMBLE "%("); - for (h = 0; h < ego->hashsiz; ++h) { - solution *l = ego->solutions + h; - if (VALIDP(l) && BLESSEDP(l) && l->slvndx >= 0) { - slvdesc *sp = ego->slvdescs + l->slvndx; - /* qui salvandos salvas gratis - salva me fons pietatis */ - p->print(p, "(%s %d #x%x #x%M #x%M #x%M #x%M)\n", - sp->reg_nam, sp->reg_id, (int)l->flags, - l->s[0], l->s[1], l->s[2], l->s[3]); - } - } - p->print(p, "%))\n"); -} - -/* mors stupebit et natura - cum resurget creatura */ -static int imprt(planner *ego, scanner *sc) -{ - char buf[MAXNAM + 1]; - md5uint sig[4]; - int flags; - int reg_id; - int slvndx; - solution *sol; - - if (!sc->scan(sc, "(" WISDOM_PREAMBLE)) - return 0; /* don't need to restore hashtable */ - - /* make a backup copy of the hash table (cache the hash) */ - { - unsigned h, hsiz = ego->hashsiz; - sol = (solution *)MALLOC(hsiz * sizeof(solution), HASHT); - for (h = 0; h < hsiz; ++h) - sol[h] = ego->solutions[h]; - } - - while (1) { - if (sc->scan(sc, ")")) - break; - - /* qua resurget ex favilla */ - if (!sc->scan(sc, "(%*s %d #x%x #x%M #x%M #x%M #x%M)", - MAXNAM, buf, ®_id, &flags, - sig + 0, sig + 1, sig + 2, sig + 3)) - goto bad; - - if ((slvndx = slookup(ego, buf, reg_id)) < 0) - goto bad; - - /* inter oves locum praesta */ - hinsert(ego, sig, (unsigned short)flags, slvndx); - } - - X(ifree0)(sol); - return 1; - - bad: - /* ``The wisdom of FFTW must be above suspicion.'' */ - X(ifree0)(ego->solutions); - ego->solutions = sol; - return 0; -} - -/* - * create a planner - */ -planner *X(mkplanner)(void) -{ - static const planner_adt padt = { - register_solver, mkplan, forget, exprt, imprt - }; - - planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS); - - p->adt = &padt; - p->nplan = p->nprob = p->nrehash = 0; - p->pcost = p->epcost = 0.0; - p->succ_lookup = p->lookup = p->lookup_iter = 0; - p->insert = p->insert_iter = p->insert_unknown = 0; - p->hook = 0; - p->cur_reg_nam = 0; - - p->slvdescs = 0; - p->nslvdesc = p->slvdescsiz = 0; - - p->solutions = 0; - p->hashsiz = p->nelem = 0U; - - p->problem_flags = 0; - p->planner_flags = 0; - p->nthr = 1; - - hgrow(p); /* so that hashsiz > 0 */ - - return p; -} - -void X(planner_destroy)(planner *ego) -{ - /* destroy hash table */ - htab_destroy(ego); - - /* destroy solvdesc table */ - FORALL_SOLVERS(ego, s, sp, { - UNUSED(sp); - X(solver_destroy)(s); - }); - - X(ifree0)(ego->slvdescs); - X(ifree)(ego); /* dona eis requiem */ -} - -plan *X(mkplan_d)(planner *ego, problem *p) -{ - plan *pln = ego->adt->mkplan(ego, p); - X(problem_destroy)(p); - return pln; -} - -/* - * Debugging code: - */ -#ifdef FFTW_DEBUG - -void X(planner_dump)(planner *ego, int verbose) -{ - unsigned valid = 0, empty = 0, infeasible = 0; - unsigned h; - UNUSED(verbose); /* historical */ - - for (h = 0; h < ego->hashsiz; ++h) { - solution *l = ego->solutions + h; - if (VALIDP(l)) { - ++valid; - if (l->slvndx < 0) ++infeasible; - } else - ++empty; - - } - - D("nplan = %d\n", ego->nplan); - D("nprob = %d\n", ego->nprob); - D("pcost = %g\n", ego->pcost); - D("epcost = %g\n", ego->epcost); - D("lookup = %d\n", ego->lookup); - D("succ_lookup = %d\n", ego->succ_lookup); - D("lookup_iter = %d\n", ego->lookup_iter); - D("insert = %d\n", ego->insert); - D("insert_iter = %d\n", ego->insert_iter); - D("insert_unknown = %d\n", ego->insert_unknown); - D("nrehash = %d\n", ego->nrehash); - D("hashsiz = %u\n", ego->hashsiz); - D("empty = %d\n", empty); - D("valid = %d\n", valid); - D("infeasible = %d\n", infeasible); - A(ego->nelem == valid); -} - -#endif diff --git a/src/fftw3/kernel/primes.c b/src/fftw3/kernel/primes.c deleted file mode 100644 index 608e51c..0000000 --- a/src/fftw3/kernel/primes.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: primes.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -/***************************************************************************/ - -/* Rader's algorithm requires lots of modular arithmetic, and if we - aren't careful we can have errors due to integer overflows. */ - -#ifdef SAFE_MULMOD - -# include <limits.h> - -/* compute (x * y) mod p, but watch out for integer overflows; we must - have x, y >= 0, p > 0. This routine is slow. */ -int X(safe_mulmod)(int x, int y, int p) -{ - if (y == 0 || x <= INT_MAX / y) - return((x * y) % p); - else { - int y2 = y/2; - return((X(safe_mulmod)(x, y2, p) + - X(safe_mulmod)(x, y - y2, p)) % p); - } -} -#endif /* safe_mulmod ('long long' unavailable) */ - -/***************************************************************************/ - -/* Compute n^m mod p, where m >= 0 and p > 0. If we really cared, we - could make this tail-recursive. */ -int X(power_mod)(int n, int m, int p) -{ - A(p > 0); - if (m == 0) - return 1; - else if (m % 2 == 0) { - int x = X(power_mod)(n, m / 2, p); - return MULMOD(x, x, p); - } - else - return MULMOD(n, X(power_mod)(n, m - 1, p), p); -} - -/* the following two routines were contributed by Greg Dionne. */ -static int get_prime_factors(int n, int *primef) -{ - int i; - int size = 0; - - primef[size++] = 2; - do - n >>= 1; - while ((n & 1) == 0); - - if (n == 1) - return size; - - for (i = 3; i * i <= n; i += 2) - if (!(n % i)) { - primef[size++] = i; - do - n /= i; - while (!(n % i)); - } - if (n == 1) - return size; - primef[size++] = n; - return size; -} - -int X(find_generator)(int p) -{ - int n, i, size; - int primef[16]; /* smallest number = 32589158477190044730 > 2^64 */ - int pm1 = p - 1; - - if (p == 2) - return 1; - - size = get_prime_factors(pm1, primef); - n = 2; - for (i = 0; i < size; i++) - if (X(power_mod)(n, pm1 / primef[i], p) == 1) { - i = -1; - n++; - } - return n; -} - -/* Return first prime divisor of n (It would be at best slightly faster to - search a static table of primes; there are 6542 primes < 2^16.) */ -int X(first_divisor)(int n) -{ - int i; - if (n <= 1) - return n; - if (n % 2 == 0) - return 2; - for (i = 3; i*i <= n; i += 2) - if (n % i == 0) - return i; - return n; -} - -int X(is_prime)(int n) -{ - return(n > 1 && X(first_divisor)(n) == n); -} - -int X(next_prime)(int n) -{ - while (!X(is_prime)(n)) ++n; - return n; -} diff --git a/src/fftw3/kernel/print.c b/src/fftw3/kernel/print.c deleted file mode 100644 index 314be7c..0000000 --- a/src/fftw3/kernel/print.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: print.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" -#include <stddef.h> -#include <stdarg.h> -#include <stdio.h> - -#define BSZ 64 - -static void myputs(printer *p, const char *s) -{ - char c; - while ((c = *s++)) - p->putchr(p, c); -} - -static void vprint(printer *p, const char *format, va_list ap) -{ - char buf[BSZ]; - const char *s = format; - char c; - int i; - - for (i = 0; i < p->indent; ++i) - p->putchr(p, ' '); - - while ((c = *s++)) { - switch (c) { - case '%': - switch ((c = *s++)) { - case 'M': { - /* md5 value */ - md5uint x = va_arg(ap, md5uint); - x = 0xffffffffUL & x; - sprintf(buf, "%8.8lx", (unsigned long)x); - goto putbuf; - } - case 'c': { - int x = va_arg(ap, int); - p->putchr(p, x); - break; - } - case 's': { - char *x = va_arg(ap, char *); - if (x) - myputs(p, x); - else - goto putnull; - break; - } - case 'd': { - int x = va_arg(ap, int); - sprintf(buf, "%d", x); - goto putbuf; - } - case 't': { - ptrdiff_t x; - A(*s == 'd'); - s += 1; - x = va_arg(ap, ptrdiff_t); - /* should use C99 %td here, but - this is not yet widespread enough */ - sprintf(buf, "%ld", (long) x); - goto putbuf; - } - case 'f': case 'e': case 'g': { - char fmt[3] = "%x"; - double x = va_arg(ap, double); - fmt[1] = c; - sprintf(buf, fmt, x); - goto putbuf; - } - case 'v': { - /* print optional vector length */ - int x = va_arg(ap, int); - if (x > 1) { - sprintf(buf, "-x%d", x); - goto putbuf; - } - break; - } - case 'o': { - /* integer option. Usage: %oNAME= */ - int x = va_arg(ap, int); - if (x) - p->putchr(p, '/'); - while ((c = *s++) != '=') - if (x) - p->putchr(p, c); - if (x) { - sprintf(buf, "=%d", x); - goto putbuf; - } - break; - } - case 'u': { - unsigned x = va_arg(ap, unsigned); - sprintf(buf, "%u", x); - goto putbuf; - } - case 'x': { - unsigned x = va_arg(ap, unsigned); - sprintf(buf, "%x", x); - goto putbuf; - } - case '(': { - /* newline, augment indent level */ - p->putchr(p, '\n'); - p->indent += p->indent_incr; - break; - } - case ')': { - /* decrement indent level */ - p->indent -= p->indent_incr; - break; - } - case 'p': { /* note difference from C's %p */ - /* print plan */ - plan *x = va_arg(ap, plan *); - if (x) - x->adt->print(x, p); - else - goto putnull; - break; - } - case 'P': { - /* print problem */ - problem *x = va_arg(ap, problem *); - if (x) - x->adt->print(x, p); - else - goto putnull; - break; - } - case 'T': { - /* print tensor */ - tensor *x = va_arg(ap, tensor *); - if (x) - X(tensor_print)(x, p); - else - goto putnull; - break; - } - default: - A(0 /* unknown format */); - break; - - putbuf: - myputs(p, buf); - break; - putnull: - myputs(p, "(null)"); - break; - } - break; - default: - p->putchr(p, c); - break; - } - } -} - -static void print(printer *p, const char *format, ...) -{ - va_list ap; - va_start(ap, format); - vprint(p, format, ap); - va_end(ap); -} - -printer *X(mkprinter)(size_t size, - void (*putchr)(printer *p, char c), - void (*cleanup)(printer *p)) -{ - printer *s = (printer *)MALLOC(size, OTHER); - s->print = print; - s->vprint = vprint; - s->putchr = putchr; - s->cleanup = cleanup; - s->indent = 0; - s->indent_incr = 2; - return s; -} - -void X(printer_destroy)(printer *p) -{ - if (p->cleanup) - p->cleanup(p); - X(ifree)(p); -} diff --git a/src/fftw3/kernel/scan.c b/src/fftw3/kernel/scan.c deleted file mode 100644 index f5fa6e1..0000000 --- a/src/fftw3/kernel/scan.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: scan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" -#include <string.h> -#include <stddef.h> -#include <stdarg.h> -#include <stdio.h> - -#ifdef USE_CTYPE -#include <ctype.h> -#else -/* Screw ctype. On linux, the is* functions call a routine that gets - the ctype map in the current locale. Because this operation is - expensive, the map is cached on a per-thread basis. I am not - willing to link this crap with FFTW. Not over my dead body. - - Sic transit gloria mundi. -*/ -#undef isspace -#define isspace(x) ((x) >= 0 && (x) <= ' ') -#undef isdigit -#define isdigit(x) ((x) >= '0' && (x) <= '9') -#undef isupper -#define isupper(x) ((x) >= 'A' && (x) <= 'Z') -#undef islower -#define islower(x) ((x) >= 'a' && (x) <= 'z') -#endif - -static int mygetc(scanner *sc) -{ - if (sc->ungotc != EOF) { - int c = sc->ungotc; - sc->ungotc = EOF; - return c; - } - return(sc->getchr(sc)); -} - -#define GETCHR(sc) mygetc(sc) - -static void myungetc(scanner *sc, int c) -{ - sc->ungotc = c; -} - -#define UNGETCHR(sc, c) myungetc(sc, c) - -static void eat_blanks(scanner *sc) -{ - int ch; - while (ch = GETCHR(sc), isspace(ch)) - ; - UNGETCHR(sc, ch); -} - -static void mygets(scanner *sc, char *s, size_t maxlen) -{ - char *s0 = s; - int ch; - - A(maxlen > 0); - while ((ch = GETCHR(sc)) != EOF && !isspace(ch) - && ch != ')' && ch != '(' && s < s0 + maxlen) - *s++ = ch; - *s = 0; - UNGETCHR(sc, ch); -} - -static long getlong(scanner *sc, int base, int *ret) -{ - int sign = 1, ch, count; - long x = 0; - - ch = GETCHR(sc); - if (ch == '-' || ch == '+') { - sign = ch == '-' ? -1 : 1; - ch = GETCHR(sc); - } - for (count = 0; ; ++count) { - if (isdigit(ch)) - ch -= '0'; - else if (isupper(ch)) - ch -= 'A' - 10; - else if (islower(ch)) - ch -= 'a' - 10; - else - break; - x = x * base + ch; - ch = GETCHR(sc); - } - x *= sign; - UNGETCHR(sc, ch); - *ret = count > 0; - return x; -} - -/* vscan is mostly scanf-like, with our additional format specifiers, - but with a few twists. It returns simply 0 or 1 indicating whether - the match was successful. '(' and ')' in the format string match - those characters preceded by any whitespace. Finally, if a - character match fails, it will ungetchr() the last character back - onto the stream. */ -static int vscan(scanner *sc, const char *format, va_list ap) -{ - const char *s = format; - char c; - int ch = 0; - size_t fmt_len; - - while ((c = *s++)) { - fmt_len = 0; - switch (c) { - case '%': - getformat: - switch ((c = *s++)) { - case 's': { - char *x = va_arg(ap, char *); - mygets(sc, x, fmt_len); - break; - } - case 'd': { - int *x = va_arg(ap, int *); - *x = (int) getlong(sc, 10, &ch); - if (!ch) return 0; - break; - } - case 'x': { - int *x = va_arg(ap, int *); - *x = (int) getlong(sc, 16, &ch); - if (!ch) return 0; - break; - } - case 'M': { - md5uint *x = va_arg(ap, md5uint *); - *x = 0xffffffffUL & getlong(sc, 16, &ch); - if (!ch) return 0; - break; - } - case '*': { - if ((fmt_len = va_arg(ap, int)) <= 0) return 0; - goto getformat; - } - default: - A(0 /* unknown format */); - break; - } - break; - default: - if (isspace(c) || c == '(' || c == ')') - eat_blanks(sc); - if (!isspace(c) && (ch = GETCHR(sc)) != c) { - UNGETCHR(sc, ch); - return 0; - } - break; - } - } - return 1; -} - -static int scan(scanner *sc, const char *format, ...) -{ - int ret; - va_list ap; - va_start(ap, format); - ret = vscan(sc, format, ap); - va_end(ap); - return ret; -} - -scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)) -{ - scanner *s = (scanner *)MALLOC(size, OTHER); - s->scan = scan; - s->vscan = vscan; - s->getchr = getchr; - s->ungotc = EOF; - return s; -} - -void X(scanner_destroy)(scanner *sc) -{ - X(ifree)(sc); -} diff --git a/src/fftw3/kernel/solver.c b/src/fftw3/kernel/solver.c deleted file mode 100644 index 4bfb899..0000000 --- a/src/fftw3/kernel/solver.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: solver.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -solver *X(mksolver)(size_t size, const solver_adt *adt) -{ - solver *s = (solver *)MALLOC(size, SOLVERS); - - s->adt = adt; - s->refcnt = 0; - return s; -} - -void X(solver_use)(solver *ego) -{ - ++ego->refcnt; -} - -void X(solver_destroy)(solver *ego) -{ - if ((--ego->refcnt) == 0) - X(ifree)(ego); -} - -void X(solver_register)(planner *plnr, solver *s) -{ - plnr->adt->register_solver(plnr, s); -} diff --git a/src/fftw3/kernel/solvtab.c b/src/fftw3/kernel/solvtab.c deleted file mode 100644 index 496915e..0000000 --- a/src/fftw3/kernel/solvtab.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: solvtab.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -void X(solvtab_exec)(solvtab tbl, planner *p) -{ - for (; tbl->reg_nam; ++tbl) { - p->cur_reg_nam = tbl->reg_nam; - p->cur_reg_id = 0; - tbl->reg(p); - } - p->cur_reg_nam = 0; -} diff --git a/src/fftw3/kernel/square.c b/src/fftw3/kernel/square.c deleted file mode 100644 index 4b5afab..0000000 --- a/src/fftw3/kernel/square.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: square.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -int X(square)(int x) -{ - return x * x; -} diff --git a/src/fftw3/kernel/stride.c b/src/fftw3/kernel/stride.c deleted file mode 100644 index fda049f..0000000 --- a/src/fftw3/kernel/stride.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: stride.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ -#include "ifftw.h" - -#ifdef PRECOMPUTE_ARRAY_INDICES -stride X(mkstride)(int n, int s) -{ - int i; - int *p = (int *) MALLOC(n * sizeof(int), STRIDES); - - for (i = 0; i < n; ++i) - p[i] = s * i; - - return p; -} - -void X(stride_destroy)(stride p) -{ - X(ifree0)(p); -} - -#endif diff --git a/src/fftw3/kernel/tensor.c b/src/fftw3/kernel/tensor.c deleted file mode 100644 index 1161963..0000000 --- a/src/fftw3/kernel/tensor.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -tensor *X(mktensor)(int rnk) -{ - tensor *x; - - A(rnk >= 0); - -#if defined(STRUCT_HACK_KR) - if (FINITE_RNK(rnk) && rnk > 1) - x = (tensor *)MALLOC(sizeof(tensor) + (rnk - 1) * sizeof(iodim), - TENSORS); - else - x = (tensor *)MALLOC(sizeof(tensor), TENSORS); -#elif defined(STRUCT_HACK_C99) - if (FINITE_RNK(rnk)) - x = (tensor *)MALLOC(sizeof(tensor) + rnk * sizeof(iodim), - TENSORS); - else - x = (tensor *)MALLOC(sizeof(tensor), TENSORS); -#else - x = (tensor *)MALLOC(sizeof(tensor), TENSORS); - if (FINITE_RNK(rnk) && rnk > 0) - x->dims = (iodim *)MALLOC(sizeof(iodim) * rnk, TENSORS); - else - x->dims = 0; -#endif - - x->rnk = rnk; - return x; -} - -void X(tensor_destroy)(tensor *sz) -{ -#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR) - X(ifree0)(sz->dims); -#endif - X(ifree)(sz); -} - -int X(tensor_sz)(const tensor *sz) -{ - int i, n = 1; - - if (!FINITE_RNK(sz->rnk)) - return 0; - - for (i = 0; i < sz->rnk; ++i) - n *= sz->dims[i].n; - return n; -} - -void X(tensor_md5)(md5 *p, const tensor *t) -{ - int i; - X(md5int)(p, t->rnk); - if (FINITE_RNK(t->rnk)) { - for (i = 0; i < t->rnk; ++i) { - const iodim *q = t->dims + i; - X(md5int)(p, q->n); - X(md5int)(p, q->is); - X(md5int)(p, q->os); - } - } -} - -/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting - appropriate n, is, and os components */ -int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os) -{ - A(t->rnk <= 1); - if (t->rnk == 1) { - const iodim *vd = t->dims; - *n = vd[0].n; - *is = vd[0].is; - *os = vd[0].os; - } else { - *n = 1; - *is = *os = 0; - } - return 1; -} - -void X(tensor_print)(const tensor *x, printer *p) -{ - if (FINITE_RNK(x->rnk)) { - int i; - int first = 1; - p->print(p, "("); - for (i = 0; i < x->rnk; ++i) { - const iodim *d = x->dims + i; - p->print(p, "%s(%d %d %d)", - first ? "" : " ", - d->n, d->is, d->os); - first = 0; - } - p->print(p, ")"); - } else { - p->print(p, "rank-minfty"); - } -} diff --git a/src/fftw3/kernel/tensor1.c b/src/fftw3/kernel/tensor1.c deleted file mode 100644 index 06ad4dc..0000000 --- a/src/fftw3/kernel/tensor1.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -tensor *X(mktensor_0d)(void) -{ - return X(mktensor(0)); -} - -tensor *X(mktensor_1d)(int n, int is, int os) -{ - tensor *x = X(mktensor)(1); - x->dims[0].n = n; - x->dims[0].is = is; - x->dims[0].os = os; - return x; -} diff --git a/src/fftw3/kernel/tensor2.c b/src/fftw3/kernel/tensor2.c deleted file mode 100644 index a45e164..0000000 --- a/src/fftw3/kernel/tensor2.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -tensor *X(mktensor_2d)(int n0, int is0, int os0, - int n1, int is1, int os1) -{ - tensor *x = X(mktensor)(2); - x->dims[0].n = n0; - x->dims[0].is = is0; - x->dims[0].os = os0; - x->dims[1].n = n1; - x->dims[1].is = is1; - x->dims[1].os = os1; - return x; -} - diff --git a/src/fftw3/kernel/tensor4.c b/src/fftw3/kernel/tensor4.c deleted file mode 100644 index 9b6cd28..0000000 --- a/src/fftw3/kernel/tensor4.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor4.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -int X(tensor_max_index)(const tensor *sz) -{ - int i; - int n = 0; - - A(FINITE_RNK(sz->rnk)); - for (i = 0; i < sz->rnk; ++i) { - const iodim *p = sz->dims + i; - n += (p->n - 1) * X(imax)(X(iabs)(p->is), X(iabs)(p->os)); - } - return n; -} - -#define tensor_min_xstride(sz, xs) { \ - A(FINITE_RNK(sz->rnk)); \ - if (sz->rnk == 0) return 0; \ - else { \ - int i; \ - int s = X(iabs)(sz->dims[0].xs); \ - for (i = 1; i < sz->rnk; ++i) \ - s = X(imin)(s, X(iabs)(sz->dims[i].xs)); \ - return s; \ - } \ -} - -int X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is) -int X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os) - -int X(tensor_min_stride)(const tensor *sz) -{ - return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz)); -} - -int X(tensor_inplace_strides)(const tensor *sz) -{ - int i; - A(FINITE_RNK(sz->rnk)); - for (i = 0; i < sz->rnk; ++i) { - const iodim *p = sz->dims + i; - if (p->is != p->os) - return 0; - } - return 1; -} - -int X(tensor_inplace_strides2)(const tensor *a, const tensor *b) -{ - return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b)); -} diff --git a/src/fftw3/kernel/tensor5.c b/src/fftw3/kernel/tensor5.c deleted file mode 100644 index 8144930..0000000 --- a/src/fftw3/kernel/tensor5.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor5.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -static void dimcpy(iodim *dst, const iodim *src, int rnk) -{ - int i; - if (FINITE_RNK(rnk)) - for (i = 0; i < rnk; ++i) - dst[i] = src[i]; -} - -tensor *X(tensor_copy)(const tensor *sz) -{ - tensor *x = X(mktensor)(sz->rnk); - dimcpy(x->dims, sz->dims, sz->rnk); - return x; -} - -/* like X(tensor_copy), but makes strides in-place by - setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */ -tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k) -{ - tensor *x = X(tensor_copy)(sz); - if (FINITE_RNK(x->rnk)) { - int i; - if (k == INPLACE_OS) - for (i = 0; i < x->rnk; ++i) - x->dims[i].is = x->dims[i].os; - else - for (i = 0; i < x->rnk; ++i) - x->dims[i].os = x->dims[i].is; - } - return x; -} - -/* Like X(tensor_copy), but copy all of the dimensions *except* - except_dim. */ -tensor *X(tensor_copy_except)(const tensor *sz, int except_dim) -{ - tensor *x; - - A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk); - x = X(mktensor)(sz->rnk - 1); - dimcpy(x->dims, sz->dims, except_dim); - dimcpy(x->dims + except_dim, sz->dims + except_dim + 1, - x->rnk - except_dim); - return x; -} - -/* Like X(tensor_copy), but copy only rnk dimensions starting - with start_dim. */ -tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk) -{ - tensor *x; - - A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk); - x = X(mktensor)(rnk); - dimcpy(x->dims, sz->dims + start_dim, rnk); - return x; -} - -tensor *X(tensor_append)(const tensor *a, const tensor *b) -{ - if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) { - return X(mktensor)(RNK_MINFTY); - } else { - tensor *x = X(mktensor)(a->rnk + b->rnk); - dimcpy(x->dims, a->dims, a->rnk); - dimcpy(x->dims + a->rnk, b->dims, b->rnk); - return x; - } -} diff --git a/src/fftw3/kernel/tensor7.c b/src/fftw3/kernel/tensor7.c deleted file mode 100644 index ceae2c4..0000000 --- a/src/fftw3/kernel/tensor7.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor7.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -/* total order among iodim's */ -int X(dimcmp)(const iodim *a, const iodim *b) -{ - if (b->is != a->is) - return (b->is - a->is); /* shorter strides go later */ - if (b->os != a->os) - return (b->os - a->os); /* shorter strides go later */ - return (int)(a->n - b->n); /* larger n's go later */ -} - -/* Like tensor_copy, but eliminate n == 1 dimensions, which - never affect any transform or transform vector. - - Also, we sort the tensor into a canonical order of decreasing - is. In general, processing a loop/array in order of - decreasing stride will improve locality; sorting also makes the - analysis in fftw_tensor_contiguous (below) easier. The choice - of is over os is mostly arbitrary, and hopefully - shouldn't affect things much. Normally, either the os will be - in the same order as is (for e.g. multi-dimensional - transforms) or will be in opposite order (e.g. for Cooley-Tukey - recursion). (Both forward and backwards traversal of the tensor - are considered e.g. by vrank-geq1, so sorting in increasing - vs. decreasing order is not really important.) */ -tensor *X(tensor_compress)(const tensor *sz) -{ - int i, rnk; - tensor *x; - - A(FINITE_RNK(sz->rnk)); - for (i = rnk = 0; i < sz->rnk; ++i) { - A(sz->dims[i].n > 0); - if (sz->dims[i].n != 1) - ++rnk; - } - - x = X(mktensor)(rnk); - for (i = rnk = 0; i < sz->rnk; ++i) { - if (sz->dims[i].n != 1) - x->dims[rnk++] = sz->dims[i]; - } - - if (rnk) { - /* God knows how qsort() behaves if n==0 */ - qsort(x->dims, (size_t)x->rnk, sizeof(iodim), - (int (*)(const void *, const void *))X(dimcmp)); - } - - return x; -} - -/* Return whether the strides of a and b are such that they form an - effective contiguous 1d array. Assumes that a.is >= b.is. */ -static int strides_contig(iodim *a, iodim *b) -{ - return (a->is == b->is * (int)b->n && - a->os == b->os * (int)b->n); -} - -/* Like tensor_compress, but also compress into one dimension any - group of dimensions that form a contiguous block of indices with - some stride. (This can safely be done for transform vector sizes.) */ -tensor *X(tensor_compress_contiguous)(const tensor *sz) -{ - int i, rnk; - tensor *sz2, *x; - - if (X(tensor_sz)(sz) == 0) - return X(mktensor)(RNK_MINFTY); - - sz2 = X(tensor_compress)(sz); - A(FINITE_RNK(sz2->rnk)); - - if (sz2->rnk < 2) /* nothing to compress */ - return sz2; - - for (i = rnk = 1; i < sz2->rnk; ++i) - if (!strides_contig(sz2->dims + i - 1, sz2->dims + i)) - ++rnk; - - x = X(mktensor)(rnk); - x->dims[0] = sz2->dims[0]; - for (i = rnk = 1; i < sz2->rnk; ++i) { - if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) { - x->dims[rnk - 1].n *= sz2->dims[i].n; - x->dims[rnk - 1].is = sz2->dims[i].is; - x->dims[rnk - 1].os = sz2->dims[i].os; - } else { - A(rnk < x->rnk); - x->dims[rnk++] = sz2->dims[i]; - } - } - - X(tensor_destroy)(sz2); - return x; -} - -/* The inverse of X(tensor_append): splits the sz tensor into - tensor a followed by tensor b, where a's rank is arnk. */ -void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b) -{ - A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk)); - - *a = X(tensor_copy_sub)(sz, 0, arnk); - *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk); -} diff --git a/src/fftw3/kernel/tensor8.c b/src/fftw3/kernel/tensor8.c deleted file mode 100644 index 05e9b47..0000000 --- a/src/fftw3/kernel/tensor8.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor8.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -void X(tensor_destroy2)(tensor *a, tensor *b) -{ - X(tensor_destroy)(a); - X(tensor_destroy)(b); -} - -void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d) -{ - X(tensor_destroy2)(a, b); - X(tensor_destroy2)(c, d); -} diff --git a/src/fftw3/kernel/tensor9.c b/src/fftw3/kernel/tensor9.c deleted file mode 100644 index 33ddf45..0000000 --- a/src/fftw3/kernel/tensor9.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: tensor9.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -int X(tensor_kosherp)(const tensor *x) -{ - int i; - - if (x->rnk < 0) return 0; - - if (FINITE_RNK(x->rnk)) { - for (i = 0; i < x->rnk; ++i) - if (x->dims[i].n < 0) - return 0; - } - return 1; -} diff --git a/src/fftw3/kernel/timer.c b/src/fftw3/kernel/timer.c deleted file mode 100644 index 72969ad..0000000 --- a/src/fftw3/kernel/timer.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: timer.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -#include "ifftw.h" - -#ifdef HAVE_UNISTD_H -# include <unistd.h> -#endif - -#ifndef WITH_SLOW_TIMER -# include "cycle.h" -#else -# if TIME_WITH_SYS_TIME -# include <sys/time.h> -# include <time.h> -# else -# if HAVE_SYS_TIME_H -# include <sys/time.h> -# else -# include <time.h> -# endif -# endif -#endif - -#ifndef FFTW_TIME_LIMIT -#define FFTW_TIME_LIMIT 2.0 /* don't run for more than two seconds */ -#endif - -#ifdef HAVE_BSDGETTIMEOFDAY -#ifndef HAVE_GETTIMEOFDAY -#define gettimeofday BSDgettimeofday -#define HAVE_GETTIMEOFDAY 1 -#endif -#endif - -#if defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_SECONDS_TIMER) -typedef struct timeval seconds; - -static seconds getseconds(void) -{ - struct timeval tv; - gettimeofday(&tv, 0); - return tv; -} - -static double elapsed_sec(seconds t1, seconds t0) -{ - return (double)(t1.tv_sec - t0.tv_sec) + - (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6; -} - -# define TIME_MIN_SEC 1.0e-2 /* from fftw2 */ -# define HAVE_SECONDS_TIMER -#endif - -#ifndef HAVE_SECONDS_TIMER -# include <time.h> - -typedef clock_t seconds; - -static seconds getseconds(void) { return clock(); } - -static double elapsed_sec(seconds t1, seconds t0) -{ - return ((double) (t1 - t0)) / CLOCKS_PER_SEC; -} - -# define TIME_MIN_SEC 2.0e-1 /* from fftw2 */ -# define HAVE_SECONDS_TIMER -#endif - -#ifdef WITH_SLOW_TIMER -/* excruciatingly slow; only use this if there is no choice! */ -typedef seconds ticks; -# define getticks getseconds -# define elapsed elapsed_sec -# define TIME_MIN TIME_MIN_SEC -# define TIME_REPEAT 4 /* from fftw2 */ -# define HAVE_TICK_COUNTER -#endif - -#ifdef HAVE_TICK_COUNTER - -# ifndef TIME_MIN -# define TIME_MIN 100.0 -# endif - -# ifndef TIME_REPEAT -# define TIME_REPEAT 8 -# endif - - static double measure(plan *pln, const problem *p, int iter) - { - ticks t0, t1; - int i; - - t0 = getticks(); - for (i = 0; i < iter; ++i) - pln->adt->solve(pln, p); - t1 = getticks(); - return elapsed(t1, t0); - } - - - double X(measure_execution_time)(plan *pln, const problem *p) - { - seconds begin, now; - double t, tmax, tmin; - int iter; - int repeat; - - AWAKE(pln, 1); - p->adt->zero(p); - - start_over: - for (iter = 1; iter; iter *= 2) { - tmin = 1.0E10; - tmax = -1.0E10; - - begin = getseconds(); - /* repeat the measurement TIME_REPEAT times */ - for (repeat = 0; repeat < TIME_REPEAT; ++repeat) { - t = measure(pln, p, iter); - - if (t < 0) - goto start_over; - - if (t < tmin) - tmin = t; - if (t > tmax) - tmax = t; - - /* do not run for too long */ - now = getseconds(); - t = elapsed_sec(now, begin); - - if (t > FFTW_TIME_LIMIT) - break; - } - - if (tmin >= TIME_MIN) { - tmin /= (double) iter; - tmax /= (double) iter; - AWAKE(pln, 0); - return tmin; - } - } - goto start_over; /* may happen if timer is screwed up */ - } - -#else /* no cycle counter */ - - double X(measure_execution_time)(plan *pln, const problem *p) - { - UNUSED(p); - UNUSED(pln); - return -1.0; - } - -#endif diff --git a/src/fftw3/kernel/transpose.c b/src/fftw3/kernel/transpose.c deleted file mode 100644 index fb489bd..0000000 --- a/src/fftw3/kernel/transpose.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* transposes of unit-stride arrays, including arrays of N-tuples and - non-square matrices, using cache-oblivious recursive algorithms */ - -#include "ifftw.h" -#include <string.h> /* memcpy */ - -#define CUTOFF 8 /* size below which we do a naive transpose */ - -/*************************************************************************/ -/* some utilities for the solvers */ - -static int Ntuple_transposable(const iodim *a, const iodim *b, - int vl, int s, R *ri, R *ii) -{ - return(2 == s && (ii == ri + 1 || ri == ii + 1) - && - ((a->is == b->os && a->is == (vl*2) - && a->os == b->n * (vl*2) && b->is == a->n * (vl*2)) - || - (a->os == b->is && a->os == (vl*2) - && a->is == b->n * (vl*2) && b->os == a->n * (vl*2)))); -} - - -/* our solvers' transpose routines work for square matrices of arbitrary - stride, or for non-square matrices of a given vl*vl2 corresponding - to the N of the Ntuple with vl2 == s. */ -int X(transposable)(const iodim *a, const iodim *b, - int vl, int s, R *ri, R *ii) -{ - return ((a->n == b->n && a->os == b->is && a->is == b->os) - || Ntuple_transposable(a, b, vl, s, ri, ii)); -} - -static int gcd(int a, int b) -{ - int r; - do { - r = a % b; - a = b; - b = r; - } while (r != 0); - - return a; -} - -/* all of the solvers need to extract n, m, d, n/d, and m/d from the - two iodims, so we put it here to save code space */ -void X(transpose_dims)(const iodim *a, const iodim *b, - int *n, int *m, int *d, int *nd, int *md) -{ - int n0, m0, d0; - /* matrix should be n x m, row-major */ - if (a->is < b->is) { - *n = n0 = b->n; - *m = m0 = a->n; - } - else { - *n = n0 = a->n; - *m = m0 = b->n; - } - *d = d0 = gcd(n0, m0); - *nd = n0 / d0; - *md = m0 / d0; -} - -/* use the simple square transpose in the solver for square matrices - that aren't too big or which have the wrong stride */ -int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s, - R *ri, R *ii) -{ - return (a->n == b->n && - (a->n*(vl*2) < CUTOFF - || !Ntuple_transposable(a, b, vl, s, ri, ii))); -} - -/* use the slow general transpose if the buffer would be more than 1/8 - the whole transpose and the transpose is fairly big. - (FIXME: use the CONSERVE_MEMORY flag?) */ -int X(transpose_slowp)(const iodim *a, const iodim *b, int N) -{ - int d = gcd(a->n, b->n); - return (d < 8 && (a->n * b->n * N) / d > 65536); -} - -/*************************************************************************/ -/* Out-of-place transposes: */ - -/* Transpose A (n x m) to B (m x n), where A and B are stored - as n x fda and m x fda arrays, respectively, operating on N-tuples: */ -static void rec_transpose_Ntuple(R *A, R *B, int n, int m, int fda, int fdb, - int N) -{ - if (n == 1 || m == 1 || (n + m) * N < CUTOFF*2) { - int i, j, k; - for (i = 0; i < n; ++i) { - for (j = 0; j < m; ++j) { - for (k = 0; k < N; ++k) { /* FIXME: unroll */ - B[(j*fdb + i) * N + k] = A[(i*fda + j) * N + k]; - } - } - } - } - else if (n > m) { - int n2 = n / 2; - rec_transpose_Ntuple(A, B, n2, m, fda, fdb, N); - rec_transpose_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, fdb, N); - } - else { - int m2 = m / 2; - rec_transpose_Ntuple(A, B, n, m2, fda, fdb, N); - rec_transpose_Ntuple(A + m2*N, B + m2*N*fdb, n, m - m2, fda, fdb, N); - } -} - -/*************************************************************************/ -/* In-place transposes of square matrices of N-tuples: */ - -/* Transpose both A and B, where A is n x m and B is m x n, storing - the transpose of A in B and the transpose of B in A. A and B - are actually stored as n x fda and m x fda arrays. */ -static void rec_transpose_swap_Ntuple(R *A, R *B, int n, int m, int fda, int N) -{ - if (n == 1 || m == 1 || (n + m) * N <= CUTOFF*2) { - switch (N) { - case 1: { - int i, j; - for (i = 0; i < n; ++i) { - for (j = 0; j < m; ++j) { - R a = A[(i*fda + j)]; - A[(i*fda + j)] = B[(j*fda + i)]; - B[(j*fda + i)] = a; - } - } - break; - } - case 2: { - int i, j; - for (i = 0; i < n; ++i) { - for (j = 0; j < m; ++j) { - R a0 = A[(i*fda + j) * 2 + 0]; - R a1 = A[(i*fda + j) * 2 + 1]; - A[(i*fda + j) * 2 + 0] = B[(j*fda + i) * 2 + 0]; - A[(i*fda + j) * 2 + 1] = B[(j*fda + i) * 2 + 1]; - B[(j*fda + i) * 2 + 0] = a0; - B[(j*fda + i) * 2 + 1] = a1; - } - } - break; - } - default: { - int i, j, k; - for (i = 0; i < n; ++i) { - for (j = 0; j < m; ++j) { - for (k = 0; k < N; ++k) { - R a = A[(i*fda + j) * N + k]; - A[(i*fda + j) * N + k] = - B[(j*fda + i) * N + k]; - B[(j*fda + i) * N + k] = a; - } - } - } - } - } - } else if (n > m) { - int n2 = n / 2; - rec_transpose_swap_Ntuple(A, B, n2, m, fda, N); - rec_transpose_swap_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, N); - } - else { - int m2 = m / 2; - rec_transpose_swap_Ntuple(A, B, n, m2, fda, N); - rec_transpose_swap_Ntuple(A + m2*N, B + m2*N*fda, n, m - m2, fda, N); - } -} - -/* Transpose A, an n x n matrix (stored as n x fda), in-place. */ -static void rec_transpose_sq_ip_Ntuple(R *A, int n, int fda, int N) -{ - if (n == 1) - return; - else if (n*N <= CUTOFF) { - switch (N) { - case 1: { - int i, j; - for (i = 0; i < n; ++i) { - for (j = i + 1; j < n; ++j) { - R a = A[(i*fda + j)]; - A[(i*fda + j)] = A[(j*fda + i)]; - A[(j*fda + i)] = a; - } - } - break; - } - case 2: { - int i, j; - for (i = 0; i < n; ++i) { - for (j = i + 1; j < n; ++j) { - R a0 = A[(i*fda + j) * 2 + 0]; - R a1 = A[(i*fda + j) * 2 + 1]; - A[(i*fda + j) * 2 + 0] = A[(j*fda + i) * 2 + 0]; - A[(i*fda + j) * 2 + 1] = A[(j*fda + i) * 2 + 1]; - A[(j*fda + i) * 2 + 0] = a0; - A[(j*fda + i) * 2 + 1] = a1; - } - } - break; - } - default: { - int i, j, k; - for (i = 0; i < n; ++i) { - for (j = i + 1; j < n; ++j) { - for (k = 0; k < N; ++k) { - R a = A[(i*fda + j) * N + k]; - A[(i*fda + j) * N + k] = - A[(j*fda + i) * N + k]; - A[(j*fda + i) * N + k] = a; - } - } - } - } - } - } else { - int n2 = n / 2; - rec_transpose_sq_ip_Ntuple(A, n2, fda, N); - rec_transpose_sq_ip_Ntuple((A + n2*N) + n2*N*fda, n - n2, fda, N); - rec_transpose_swap_Ntuple(A + n2*N, A + n2*N*fda, n2, n - n2, fda,N); - } -} - -/*************************************************************************/ -/* In-place transposes of non-square matrices: */ - -/* Transpose the matrix A in-place, where A is an (n*d) x (m*d) matrix - of N-tuples and buf contains at least n*m*d*N elements. In - general, to transpose a p x q matrix, you should call this routine - with d = gcd(p, q), n = p/d, and m = q/d. */ -void X(transpose)(R *A, int n, int m, int d, int N, R *buf) -{ - A(n > 0 && m > 0 && N > 0 && d > 0); - if (d == 1) { - rec_transpose_Ntuple(A, buf, n,m, m,n, N); - memcpy(A, buf, m*n*N*sizeof(R)); - } - else if (n*m == 1) { - rec_transpose_sq_ip_Ntuple(A, d, d, N); - } - else { - int i, num_el = n*m*d*N; - - /* treat as (d x n) x (d' x m) matrix. (d' = d) */ - - /* First, transpose d x (n x d') x m to d x (d' x n) x m, - using the buf matrix. This consists of d transposes - of contiguous n x d' matrices of m-tuples. */ - if (n > 1) { - for (i = 0; i < d; ++i) { - rec_transpose_Ntuple(A + i*num_el, buf, - n,d, d,n, m*N); - memcpy(A + i*num_el, buf, num_el*sizeof(R)); - } - } - - /* Now, transpose (d x d') x (n x m) to (d' x d) x (n x m), which - is a square in-place transpose of n*m-tuples: */ - rec_transpose_sq_ip_Ntuple(A, d, d, n*m*N); - - /* Finally, transpose d' x ((d x n) x m) to d' x (m x (d x n)), - using the buf matrix. This consists of d' transposes - of contiguous d*n x m matrices. */ - if (m > 1) { - for (i = 0; i < d; ++i) { - rec_transpose_Ntuple(A + i*num_el, buf, - d*n,m, m,d*n, N); - memcpy(A + i*num_el, buf, num_el*sizeof(R)); - } - } - } -} - -/*************************************************************************/ -/* In-place transpose routine from TOMS. This routine is much slower - than the cache-oblivious algorithm above, but is has the advantage - of requiring less buffer space for the case of gcd(nx,ny) small. */ - -/* - * TOMS Transpose. Revised version of algorithm 380. - * - * These routines do in-place transposes of arrays. - * - * [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software, - * vol. 3, no. 1, 104-110 (1977) ] - * - * C version by Steven G. Johnson. February 1997. - */ - -/* - * "a" is a 1D array of length ny*nx*N which constains the nx x ny - * matrix of N-tuples to be transposed. "a" is stored in row-major - * order (last index varies fastest). move is a 1D array of length - * move_size used to store information to speed up the process. The - * value move_size=(ny+nx)/2 is recommended. buf should be an array - * of length 2*N. - * - */ - -void X(transpose_slow)(R *a, int nx, int ny, int N, - char *move, int move_size, R *buf) -{ - int i, j, im, mn; - R *b, *c, *d; - int ncount; - int k; - - /* check arguments and initialize: */ - A(ny > 0 && nx > 0 && N > 0 && move_size > 0); - - b = buf; - - if (ny == nx) { - /* - * if matrix is square, exchange elements a(i,j) and a(j,i): - */ - for (i = 0; i < nx; ++i) - for (j = i + 1; j < nx; ++j) { - memcpy(b, &a[N * (i + j * nx)], N * sizeof(R)); - memcpy(&a[N * (i + j * nx)], &a[N * (j + i * nx)], N * sizeof(R)); - memcpy(&a[N * (j + i * nx)], b, N * sizeof(R)); - } - return; - } - c = buf + N; - ncount = 2; /* always at least 2 fixed points */ - k = (mn = ny * nx) - 1; - - for (i = 0; i < move_size; ++i) - move[i] = 0; - - if (ny >= 3 && nx >= 3) - ncount += gcd(ny - 1, nx - 1) - 1; /* # fixed points */ - - i = 1; - im = ny; - - while (1) { - int i1, i2, i1c, i2c; - int kmi; - - /** Rearrange the elements of a loop - and its companion loop: **/ - - i1 = i; - kmi = k - i; - memcpy(b, &a[N * i1], N * sizeof(R)); - i1c = kmi; - memcpy(c, &a[N * i1c], N * sizeof(R)); - - while (1) { - i2 = ny * i1 - k * (i1 / nx); - i2c = k - i2; - if (i1 < move_size) - move[i1] = 1; - if (i1c < move_size) - move[i1c] = 1; - ncount += 2; - if (i2 == i) - break; - if (i2 == kmi) { - d = b; - b = c; - c = d; - break; - } - memcpy(&a[N * i1], &a[N * i2], - N * sizeof(R)); - memcpy(&a[N * i1c], &a[N * i2c], - N * sizeof(R)); - i1 = i2; - i1c = i2c; - } - memcpy(&a[N * i1], b, N * sizeof(R)); - memcpy(&a[N * i1c], c, N * sizeof(R)); - - if (ncount >= mn) - break; /* we've moved all elements */ - - /** Search for loops to rearrange: **/ - - while (1) { - int max = k - i; - ++i; - A(i <= max); - im += ny; - if (im > k) - im -= k; - i2 = im; - if (i == i2) - continue; - if (i >= move_size) { - while (i2 > i && i2 < max) { - i1 = i2; - i2 = ny * i1 - k * (i1 / nx); - } - if (i2 == i) - break; - } else if (!move[i]) - break; - } - } -} diff --git a/src/fftw3/kernel/trig.c b/src/fftw3/kernel/trig.c deleted file mode 100644 index 0d4c3b2..0000000 --- a/src/fftw3/kernel/trig.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: trig.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* trigonometric functions */ -#include "ifftw.h" -#include <math.h> - -trigreal X(cos2pi)(int m, int n) -{ - return X(sincos)((trigreal)m, (trigreal)n, 0); -} - -trigreal X(sin2pi)(int m, int n) -{ - return X(sincos)((trigreal)m, (trigreal)n, 1); -} - -trigreal X(tan2pi)(int m, int n) -{ -#if 0 /* unimplemented, unused */ - trigreal dm = m, dn = n; - return TAN(by2pi(dm, dn)); -#endif - UNUSED(m); UNUSED(n); - return 0.0; -} diff --git a/src/fftw3/kernel/trig1.c b/src/fftw3/kernel/trig1.c deleted file mode 100644 index b86362c..0000000 --- a/src/fftw3/kernel/trig1.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: trig1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* trigonometric functions */ -#include "ifftw.h" -#include <math.h> - -#ifdef FFTW_LDOUBLE -# define COS cosl -# define SIN sinl -# define TAN tanl -# define KTRIG(x) (x##L) -#else -# define COS cos -# define SIN sin -# define TAN tan -# define KTRIG(x) (x) -#endif - -static const trigreal K2PI = - KTRIG(6.2831853071795864769252867665590057683943388); -#define by2pi(m, n) ((K2PI * (m)) / (n)) - -/* - * Improve accuracy by reducing x to range [0..1/8] - * before multiplication by 2 * PI. - */ - -trigreal X(sincos)(trigreal m, trigreal n, int sinp) -{ - /* waiting for C to get tail recursion... */ - trigreal half_n = n * KTRIG(0.5); - trigreal quarter_n = half_n * KTRIG(0.5); - trigreal eighth_n = quarter_n * KTRIG(0.5); - trigreal sgn = KTRIG(1.0); - - if (sinp) goto sin; - cos: - if (m < 0) { m = -m; /* goto cos; */ } - if (m > half_n) { m = n - m; goto cos; } - if (m > eighth_n) { m = quarter_n - m; goto sin; } - return sgn * COS(by2pi(m, n)); - - msin: - sgn = -sgn; - sin: - if (m < 0) { m = -m; goto msin; } - if (m > half_n) { m = n - m; goto msin; } - if (m > eighth_n) { m = quarter_n - m; goto cos; } - return sgn * SIN(by2pi(m, n)); -} diff --git a/src/fftw3/kernel/twiddle.c b/src/fftw3/kernel/twiddle.c deleted file mode 100644 index 785bdd7..0000000 --- a/src/fftw3/kernel/twiddle.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: twiddle.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* Twiddle manipulation */ - -#include "ifftw.h" -#include <math.h> - -/* table of known twiddle factors */ -static twid *twlist = (twid *) 0; - -static int equal_instr(const tw_instr *p, const tw_instr *q) -{ - if (p == q) - return 1; - - for (;; ++p, ++q) { - if (p->op != q->op || p->v != q->v || p->i != q->i) - return 0; - if (p->op == TW_NEXT) /* == q->op */ - return 1; - } - A(0 /* can't happen */); -} - -static int ok_twid(const twid *t, const tw_instr *q, int n, int r, int m) -{ - return (n == t->n && r == t->r && m <= t->m && equal_instr(t->instr, q)); -} - -static twid *lookup(const tw_instr *q, int n, int r, int m) -{ - twid *p; - - for (p = twlist; p && !ok_twid(p, q, n, r, m); p = p->cdr) - ; - return p; -} - -static int twlen0(int r, const tw_instr **pp) -{ - int ntwiddle = 0; - const tw_instr *p = *pp; - - /* compute length of bytecode program */ - A(r > 0); - for ( ; p->op != TW_NEXT; ++p) { - switch (p->op) { - case TW_FULL: - ntwiddle += (r - 1) * 2; - break; - case TW_GENERIC: - ntwiddle += r * 2; - break; - default: - ++ntwiddle; - } - } - - *pp = p; - return ntwiddle; -} - -int X(twiddle_length)(int r, const tw_instr *p) -{ - return twlen0(r, &p); -} - -static R *compute(const tw_instr *instr, int n, int r, int m) -{ - int ntwiddle, j; - R *W, *W0; - const tw_instr *p; - - static trigreal (*const f[])(int, int) = { - X(cos2pi), X(sin2pi), X(tan2pi) - }; - - p = instr; - ntwiddle = twlen0(r, &p); - - W0 = W = (R *)MALLOC(ntwiddle * (m / p->v) * sizeof(R), TWIDDLES); - - for (j = 0; j < m; j += p->v) { - for (p = instr; p->op != TW_NEXT; ++p) { - switch (p->op) { - case TW_FULL: - { - int i; - A((int)p->i == r); /* consistency check */ - for (i = 1; i < r; ++i) { - *W++ = f[TW_COS]((j + p->v) * i, n); - *W++ = f[TW_SIN]((j + p->v) * i, n); - } - break; - } - - case TW_GENERIC: - { - int i; - A(p->v == 0); /* unused */ - A(p->i == 0); /* unused */ - for (i = 0; i < r; ++i) { - int k = j * r + i; - *W++ = f[TW_COS](k, n); - *W++ = FFT_SIGN * f[TW_SIN](k, n); - } - break; - } - - default: - *W++ = f[p->op](((signed int)(j + p->v)) * p->i, n); - break; - } - } - A(m % p->v == 0); - } - - return W0; -} - -void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m) -{ - twid *p; - - if (*pp) return; /* already created */ - - if ((p = lookup(instr, n, r, m))) { - ++p->refcnt; - goto done; - } - - p = (twid *) MALLOC(sizeof(twid), TWIDDLES); - p->n = n; - p->r = r; - p->m = m; - p->instr = instr; - p->refcnt = 1; - p->W = compute(instr, n, r, m); - - /* cons! onto twlist */ - p->cdr = twlist; - twlist = p; - - done: - *pp = p; - return; -} - -void X(twiddle_destroy)(twid **pp) -{ - twid *p = *pp; - if (p) { - twid **q; - if ((--p->refcnt) == 0) { - /* remove p from twiddle list */ - for (q = &twlist; *q; q = &((*q)->cdr)) { - if (*q == p) { - *q = p->cdr; - X(ifree)(p->W); - X(ifree)(p); - goto done; - } - } - A(0 /* can't happen */ ); - } - } - done: - *pp = 0; /* destroy pointer */ - return; -} - - -void X(twiddle_awake)(int flg, twid **pp, - const tw_instr *instr, int n, int r, int m) -{ - if (flg) - X(mktwiddle)(pp, instr, n, r, m); - else - X(twiddle_destroy)(pp); -} |