summaryrefslogtreecommitdiff
path: root/src/fftw3/kernel/ifftw.h
diff options
context:
space:
mode:
authorscuri <scuri>2008-10-17 06:10:15 +0000
committerscuri <scuri>2008-10-17 06:10:15 +0000
commit5a422aba704c375a307a902bafe658342e209906 (patch)
tree5005011e086bb863d8fb587ad3319bbec59b2447 /src/fftw3/kernel/ifftw.h
First commit - moving from LuaForge to SourceForge
Diffstat (limited to 'src/fftw3/kernel/ifftw.h')
-rw-r--r--src/fftw3/kernel/ifftw.h848
1 files changed, 848 insertions, 0 deletions
diff --git a/src/fftw3/kernel/ifftw.h b/src/fftw3/kernel/ifftw.h
new file mode 100644
index 0000000..0269e18
--- /dev/null
+++ b/src/fftw3/kernel/ifftw.h
@@ -0,0 +1,848 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/* $Id: ifftw.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* FFTW internal header file */
+#ifndef __IFFTW_H__
+#define __IFFTW_H__
+
+#include "config.h"
+
+#include <stdlib.h> /* size_t */
+#include <stdarg.h> /* va_list */
+#include <stddef.h> /* ptrdiff_t */
+
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#if HAVE_STDINT_H
+# include <stdint.h> /* uintptr_t, maybe */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h> /* uintptr_t, maybe */
+#endif
+
+/* determine precision and name-mangling scheme */
+#define CONCAT(prefix, name) prefix ## name
+#if defined(FFTW_SINGLE)
+typedef float R;
+#define X(name) CONCAT(fftwf_, name)
+#elif defined(FFTW_LDOUBLE)
+typedef long double R;
+#define X(name) CONCAT(fftwl_, name)
+#else
+typedef double R;
+#define X(name) CONCAT(fftw_, name)
+#endif
+
+/* dummy use of unused parameters to silence compiler warnings */
+#define UNUSED(x) (void)x
+
+#define FFT_SIGN (-1) /* sign convention for forward transforms */
+
+/* get rid of that object-oriented stink: */
+#define REGISTER_SOLVER(p, s) X(solver_register)(p, s)
+
+#define STRINGIZEx(x) #x
+#define STRINGIZE(x) STRINGIZEx(x)
+
+#ifndef HAVE_K7
+#define HAVE_K7 0
+#endif
+
+#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || defined(HAVE_3DNOW)
+#define HAVE_SIMD 1
+#else
+#define HAVE_SIMD 0
+#endif
+
+/* forward declarations */
+typedef struct problem_s problem;
+typedef struct plan_s plan;
+typedef struct solver_s solver;
+typedef struct planner_s planner;
+typedef struct printer_s printer;
+typedef struct scanner_s scanner;
+
+/*-----------------------------------------------------------------------*/
+/* alloca: */
+#if HAVE_SIMD
+#define MIN_ALIGNMENT 16
+#endif
+
+#ifdef HAVE_ALLOCA
+ /* use alloca if available */
+
+#ifndef alloca
+#ifdef __GNUC__
+# define alloca __builtin_alloca
+#else
+# ifdef _MSC_VER
+# include <malloc.h>
+# define alloca _alloca
+# else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+void *alloca(size_t);
+# endif
+# endif
+# endif
+# endif
+#endif
+#endif
+
+# ifdef MIN_ALIGNMENT
+# define STACK_MALLOC(T, p, x) \
+ { \
+ p = (T)alloca((x) + MIN_ALIGNMENT); \
+ p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) & \
+ (~(uintptr_t)(MIN_ALIGNMENT - 1))); \
+ }
+# define STACK_FREE(x)
+# else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */
+# define STACK_MALLOC(T, p, x) p = (T)alloca(x)
+# define STACK_FREE(x)
+# endif
+
+#else /* ! HAVE_ALLOCA */
+ /* use malloc instead of alloca */
+# define STACK_MALLOC(T, p, x) p = (T)MALLOC(x, OTHER)
+# define STACK_FREE(x) X(ifree)(x)
+#endif /* ! HAVE_ALLOCA */
+
+/*-----------------------------------------------------------------------*/
+/* define uintptr_t if it is not already defined */
+
+#ifndef HAVE_UINTPTR_T
+# if SIZEOF_VOID_P == 0
+# error sizeof void* is unknown!
+# elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P
+ typedef unsigned int uintptr_t;
+# elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P
+ typedef unsigned long uintptr_t;
+# elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P
+ typedef unsigned long long uintptr_t;
+# else
+# error no unsigned integer type matches void* sizeof!
+# endif
+#endif
+
+/*-----------------------------------------------------------------------*/
+/* assert.c: */
+extern void X(assertion_failed)(const char *s, int line, const char *file);
+
+/* always check */
+#define CK(ex) \
+ (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
+
+#ifdef FFTW_DEBUG
+/* check only if debug enabled */
+#define A(ex) \
+ (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
+#else
+#define A(ex) /* nothing */
+#endif
+
+extern void X(debug)(const char *format, ...);
+#define D X(debug)
+
+/*-----------------------------------------------------------------------*/
+/* alloc.c: */
+
+/* objects allocated by malloc, for statistical purposes */
+enum malloc_tag {
+ EVERYTHING,
+ PLANS,
+ SOLVERS,
+ PROBLEMS,
+ BUFFERS,
+ HASHT,
+ TENSORS,
+ PLANNERS,
+ SLVDESCS,
+ TWIDDLES,
+ STRIDES,
+ OTHER,
+ MALLOC_WHAT_LAST /* must be last */
+};
+
+extern void X(ifree)(void *ptr);
+extern void X(ifree0)(void *ptr);
+
+#ifdef FFTW_DEBUG_MALLOC
+
+extern void *X(malloc_debug)(size_t n, enum malloc_tag what,
+ const char *file, int line);
+#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__)
+#define NATIVE_MALLOC(n, what) MALLOC(n, what)
+void X(malloc_print_minfo)(int vrbose);
+
+#else /* ! FFTW_DEBUG_MALLOC */
+
+extern void *X(malloc_plain)(size_t sz);
+#define MALLOC(n, what) X(malloc_plain)(n)
+#define NATIVE_MALLOC(n, what) malloc(n)
+
+#endif
+
+#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && defined(HAVE_THREADS)
+extern int X(in_thread);
+# define IN_THREAD X(in_thread)
+# define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1
+# define THREAD_OFF X(in_thread) = in_thread_save; }
+#else
+# define IN_THREAD 0
+# define THREAD_ON
+# define THREAD_OFF
+#endif
+
+/*-----------------------------------------------------------------------*/
+/* ops.c: */
+/*
+ * ops counter. The total number of additions is add + fma
+ * and the total number of multiplications is mul + fma.
+ * Total flops = add + mul + 2 * fma
+ */
+typedef struct {
+ double add;
+ double mul;
+ double fma;
+ double other;
+} opcnt;
+
+void X(ops_zero)(opcnt *dst);
+void X(ops_other)(int o, opcnt *dst);
+void X(ops_cpy)(const opcnt *src, opcnt *dst);
+
+void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst);
+void X(ops_add2)(const opcnt *a, opcnt *dst);
+
+/* dst = m * a + b */
+void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst);
+
+/* dst += m * a */
+void X(ops_madd2)(int m, const opcnt *a, opcnt *dst);
+
+
+/*-----------------------------------------------------------------------*/
+/* minmax.c: */
+int X(imax)(int a, int b);
+int X(imin)(int a, int b);
+
+/*-----------------------------------------------------------------------*/
+/* iabs.c: */
+int X(iabs)(int a);
+
+/*-----------------------------------------------------------------------*/
+/* md5.c */
+
+#if SIZEOF_UNSIGNED_INT >= 4
+typedef unsigned int md5uint;
+#else
+typedef unsigned long md5uint; /* at least 32 bits as per C standard */
+#endif
+
+typedef md5uint md5sig[4];
+
+typedef struct {
+ md5sig s; /* state and signature */
+
+ /* fields not meant to be used outside md5.c: */
+ unsigned char c[64]; /* stuff not yet processed */
+ unsigned l; /* total length. Should be 64 bits long, but this is
+ good enough for us */
+} md5;
+
+void X(md5begin)(md5 *p);
+void X(md5putb)(md5 *p, const void *d_, int len);
+void X(md5puts)(md5 *p, const char *s);
+void X(md5putc)(md5 *p, unsigned char c);
+void X(md5int)(md5 *p, int i);
+void X(md5unsigned)(md5 *p, unsigned i);
+void X(md5ptrdiff)(md5 *p, ptrdiff_t d);
+void X(md5end)(md5 *p);
+
+/*-----------------------------------------------------------------------*/
+/* tensor.c: */
+#define STRUCT_HACK_KR
+#undef STRUCT_HACK_C99
+
+typedef struct {
+ int n;
+ int is; /* input stride */
+ int os; /* output stride */
+} iodim;
+
+typedef struct {
+ int rnk;
+#if defined(STRUCT_HACK_KR)
+ iodim dims[1];
+#elif defined(STRUCT_HACK_C99)
+ iodim dims[];
+#else
+ iodim *dims;
+#endif
+} tensor;
+
+/*
+ Definition of rank -infinity.
+ This definition has the property that if you want rank 0 or 1,
+ you can simply test for rank <= 1. This is a common case.
+
+ A tensor of rank -infinity has size 0.
+*/
+#define RNK_MINFTY ((int)(((unsigned) -1) >> 1))
+#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY)
+
+typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind;
+
+tensor *X(mktensor)(int rnk);
+tensor *X(mktensor_0d)(void);
+tensor *X(mktensor_1d)(int n, int is, int os);
+tensor *X(mktensor_2d)(int n0, int is0, int os0,
+ int n1, int is1, int os1);
+int X(tensor_sz)(const tensor *sz);
+void X(tensor_md5)(md5 *p, const tensor *t);
+int X(tensor_max_index)(const tensor *sz);
+int X(tensor_min_istride)(const tensor *sz);
+int X(tensor_min_ostride)(const tensor *sz);
+int X(tensor_min_stride)(const tensor *sz);
+int X(tensor_inplace_strides)(const tensor *sz);
+int X(tensor_inplace_strides2)(const tensor *a, const tensor *b);
+tensor *X(tensor_copy)(const tensor *sz);
+int X(tensor_kosherp)(const tensor *x);
+
+tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k);
+tensor *X(tensor_copy_except)(const tensor *sz, int except_dim);
+tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk);
+tensor *X(tensor_compress)(const tensor *sz);
+tensor *X(tensor_compress_contiguous)(const tensor *sz);
+tensor *X(tensor_append)(const tensor *a, const tensor *b);
+void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b);
+int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os);
+void X(tensor_destroy)(tensor *sz);
+void X(tensor_destroy2)(tensor *a, tensor *b);
+void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d);
+void X(tensor_print)(const tensor *sz, printer *p);
+int X(dimcmp)(const iodim *a, const iodim *b);
+
+/*-----------------------------------------------------------------------*/
+/* problem.c: */
+typedef struct {
+ void (*hash) (const problem *ego, md5 *p);
+ void (*zero) (const problem *ego);
+ void (*print) (problem *ego, printer *p);
+ void (*destroy) (problem *ego);
+} problem_adt;
+
+struct problem_s {
+ const problem_adt *adt;
+};
+
+problem *X(mkproblem)(size_t sz, const problem_adt *adt);
+void X(problem_destroy)(problem *ego);
+
+/*-----------------------------------------------------------------------*/
+/* print.c */
+struct printer_s {
+ void (*print)(printer *p, const char *format, ...);
+ void (*vprint)(printer *p, const char *format, va_list ap);
+ void (*putchr)(printer *p, char c);
+ void (*cleanup)(printer *p);
+ int indent;
+ int indent_incr;
+};
+
+printer *X(mkprinter)(size_t size,
+ void (*putchr)(printer *p, char c),
+ void (*cleanup)(printer *p));
+void X(printer_destroy)(printer *p);
+
+/*-----------------------------------------------------------------------*/
+/* scan.c */
+struct scanner_s {
+ int (*scan)(scanner *sc, const char *format, ...);
+ int (*vscan)(scanner *sc, const char *format, va_list ap);
+ int (*getchr)(scanner *sc);
+ int ungotc;
+};
+
+scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc));
+void X(scanner_destroy)(scanner *sc);
+
+/*-----------------------------------------------------------------------*/
+/* plan.c: */
+typedef struct {
+ void (*solve)(const plan *ego, const problem *p);
+ void (*awake)(plan *ego, int flag);
+ void (*print)(const plan *ego, printer *p);
+ void (*destroy)(plan *ego);
+} plan_adt;
+
+struct plan_s {
+ const plan_adt *adt;
+ int awake_refcnt;
+ opcnt ops;
+ double pcost;
+};
+
+plan *X(mkplan)(size_t size, const plan_adt *adt);
+void X(plan_destroy_internal)(plan *ego);
+void X(plan_awake)(plan *ego, int flag);
+#define AWAKE(plan, flag) X(plan_awake)(plan, flag)
+void X(plan_null_destroy)(plan *ego);
+
+/*-----------------------------------------------------------------------*/
+/* solver.c: */
+typedef struct {
+ plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr);
+} solver_adt;
+
+struct solver_s {
+ const solver_adt *adt;
+ int refcnt;
+};
+
+solver *X(mksolver)(size_t size, const solver_adt *adt);
+void X(solver_use)(solver *ego);
+void X(solver_destroy)(solver *ego);
+void X(solver_register)(planner *plnr, solver *s);
+
+/* shorthand */
+#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt)
+
+/*-----------------------------------------------------------------------*/
+/* planner.c */
+
+typedef struct slvdesc_s {
+ solver *slv;
+ const char *reg_nam;
+ unsigned nam_hash;
+ int reg_id;
+} slvdesc;
+
+typedef struct solution_s solution; /* opaque */
+
+/* values for problem_flags: */
+enum {
+ DESTROY_INPUT = 0x1,
+ NO_SIMD = 0x2,
+ CONSERVE_MEMORY = 0x4,
+ NO_DHT_R2HC = 0x8
+};
+
+#define DESTROY_INPUTP(plnr) ((plnr)->problem_flags & DESTROY_INPUT)
+#define NO_SIMDP(plnr) ((plnr)->problem_flags & NO_SIMD)
+#define CONSERVE_MEMORYP(plnr) ((plnr)->problem_flags & CONSERVE_MEMORY)
+#define NO_DHT_R2HCP(plnr) ((plnr)->problem_flags & NO_DHT_R2HC)
+
+/* values for planner_flags: */
+enum {
+ /* impatience flags */
+
+ BELIEVE_PCOST = 0x1,
+ DFT_R2HC_ICKY = 0x2,
+ NONTHREADED_ICKY = 0x4,
+ NO_BUFFERING = 0x8,
+ NO_EXHAUSTIVE = 0x10,
+ NO_INDIRECT_OP = 0x20,
+ NO_LARGE_GENERIC = 0x40,
+ NO_RANK_SPLITS = 0x80,
+ NO_VRANK_SPLITS = 0x100,
+ NO_VRECURSE = 0x200,
+
+ /* flags that control the search */
+ NO_UGLY = 0x400, /* avoid plans we are 99% sure are suboptimal */
+ NO_SEARCH = 0x800, /* avoid searching altogether---use wisdom entries
+ only */
+
+ ESTIMATE = 0x1000,
+ IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)),
+
+ BLESSING = 0x4000, /* save this entry */
+ H_VALID = 0x8000, /* valid hastable entry */
+ NONIMPATIENCE_FLAGS = BLESSING
+};
+
+#define BELIEVE_PCOSTP(plnr) ((plnr)->planner_flags & BELIEVE_PCOST)
+#define DFT_R2HC_ICKYP(plnr) ((plnr)->planner_flags & DFT_R2HC_ICKY)
+#define ESTIMATEP(plnr) ((plnr)->planner_flags & ESTIMATE)
+#define NONTHREADED_ICKYP(plnr) (((plnr)->planner_flags & NONTHREADED_ICKY) \
+ && (plnr)->nthr > 1)
+#define NO_BUFFERINGP(plnr) ((plnr)->planner_flags & NO_BUFFERING)
+#define NO_EXHAUSTIVEP(plnr) ((plnr)->planner_flags & NO_EXHAUSTIVE)
+#define NO_INDIRECT_OP_P(plnr) ((plnr)->planner_flags & NO_INDIRECT_OP)
+#define NO_LARGE_GENERICP(plnr) ((plnr)->planner_flags & NO_LARGE_GENERIC)
+#define NO_RANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_RANK_SPLITS)
+#define NO_UGLYP(plnr) ((plnr)->planner_flags & NO_UGLY)
+#define NO_SEARCHP(plnr) ((plnr)->planner_flags & NO_SEARCH)
+#define NO_VRANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_VRANK_SPLITS)
+#define NO_VRECURSEP(plnr) ((plnr)->planner_flags & NO_VRECURSE)
+
+typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia;
+
+typedef struct {
+ void (*register_solver)(planner *ego, solver *s);
+ plan *(*mkplan)(planner *ego, problem *p);
+ void (*forget)(planner *ego, amnesia a);
+ void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved
+ word in C++. */
+ int (*imprt)(planner *ego, scanner *sc);
+} planner_adt;
+
+struct planner_s {
+ const planner_adt *adt;
+ void (*hook)(plan *pln, const problem *p, int optimalp);
+
+ /* solver descriptors */
+ slvdesc *slvdescs;
+ unsigned nslvdesc, slvdescsiz;
+ const char *cur_reg_nam;
+ int cur_reg_id;
+
+ /* hash table of solutions */
+ solution *solutions;
+ unsigned hashsiz, nelem;
+
+ int nthr;
+ unsigned problem_flags;
+ unsigned short planner_flags; /* matches type of solution.flags in
+ planner.c */
+ /* various statistics */
+ int nplan; /* number of plans evaluated */
+ double pcost, epcost; /* total pcost of measured/estimated plans */
+ int nprob; /* number of problems evaluated */
+ int lookup, succ_lookup, lookup_iter;
+ int insert, insert_iter, insert_unknown;
+ int nrehash;
+};
+
+planner *X(mkplanner)(void);
+void X(planner_destroy)(planner *ego);
+
+#ifdef FFTW_DEBUG
+void X(planner_dump)(planner *ego, int vrbose);
+#endif
+
+/*
+ Iterate over all solvers. Read:
+
+ @article{ baker93iterators,
+ author = "Henry G. Baker, Jr.",
+ title = "Iterators: Signs of Weakness in Object-Oriented Languages",
+ journal = "{ACM} {OOPS} Messenger",
+ volume = "4",
+ number = "3",
+ pages = "18--25"
+ }
+*/
+#define FORALL_SOLVERS(ego, s, p, what) \
+{ \
+ unsigned _cnt; \
+ for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) { \
+ slvdesc *p = ego->slvdescs + _cnt; \
+ solver *s = p->slv; \
+ what; \
+ } \
+}
+
+/* make plan, destroy problem */
+plan *X(mkplan_d)(planner *ego, problem *p);
+
+/*-----------------------------------------------------------------------*/
+/* stride.c: */
+
+/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */
+#if (defined(__i386__) || _M_IX86 >= 500) && !HAVE_K7 && !defined(FFTW_LDOUBLE)
+#define PRECOMPUTE_ARRAY_INDICES
+#endif
+
+#ifdef PRECOMPUTE_ARRAY_INDICES
+typedef int *stride;
+#define WS(stride, i) (stride[i])
+extern stride X(mkstride)(int n, int s);
+void X(stride_destroy)(stride p);
+
+#else
+
+typedef int stride;
+#define WS(stride, i) (stride * i)
+#define fftwf_mkstride(n, stride) stride
+#define fftw_mkstride(n, stride) stride
+#define fftwl_mkstride(n, stride) stride
+#define fftwf_stride_destroy(p) ((void) p)
+#define fftw_stride_destroy(p) ((void) p)
+#define fftwl_stride_destroy(p) ((void) p)
+
+#endif /* PRECOMPUTE_ARRAY_INDICES */
+
+/*-----------------------------------------------------------------------*/
+/* solvtab.c */
+
+struct solvtab_s { void (*reg)(planner *); const char *reg_nam; };
+typedef struct solvtab_s solvtab[];
+void X(solvtab_exec)(solvtab tbl, planner *p);
+#define SOLVTAB(s) { s, STRINGIZE(s) }
+#define SOLVTAB_END { 0, 0 }
+
+/*-----------------------------------------------------------------------*/
+/* pickdim.c */
+int X(pickdim)(int which_dim, const int *buddies, int nbuddies,
+ const tensor *sz, int oop, int *dp);
+
+/*-----------------------------------------------------------------------*/
+/* twiddle.c */
+/* little language to express twiddle factors computation */
+enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3,
+ TW_FULL = 4, TW_GENERIC = 5 };
+
+typedef struct {
+ unsigned char op;
+ unsigned char v;
+ short i;
+} tw_instr;
+
+typedef struct twid_s {
+ R *W; /* array of twiddle factors */
+ int n, r, m; /* transform order, radix, # twiddle rows */
+ int refcnt;
+ const tw_instr *instr;
+ struct twid_s *cdr;
+} twid;
+
+void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m);
+void X(twiddle_destroy)(twid **pp);
+int X(twiddle_length)(int r, const tw_instr *p);
+void X(twiddle_awake)(int flg, twid **pp,
+ const tw_instr *instr, int n, int r, int m);
+
+/*-----------------------------------------------------------------------*/
+/* trig.c */
+#ifdef FFTW_LDOUBLE
+typedef long double trigreal;
+#else
+typedef double trigreal;
+#endif
+
+extern trigreal X(cos2pi)(int, int);
+extern trigreal X(sin2pi)(int, int);
+extern trigreal X(tan2pi)(int, int);
+extern trigreal X(sincos)(trigreal m, trigreal n, int sinp);
+
+/*-----------------------------------------------------------------------*/
+/* primes.c: */
+
+#if defined(FFTW_ENABLE_UNSAFE_MULMOD)
+# define MULMOD(x,y,p) (((x) * (y)) % (p))
+#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG >= 2 * SIZEOF_INT))
+# define MULMOD(x,y,p) ((int) ((((long) (x)) * ((long) (y))) % ((long) (p))))
+#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT))
+# define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \
+ % ((long long) (p))))
+#elif defined(_MSC_VER)
+# define MULMOD(x,y,p) ((int) ((((__int64) (x)) * ((__int64) (y))) \
+ % ((__int64) (p))))
+#else /* 'long long' unavailable */
+# define SAFE_MULMOD 1
+int X(safe_mulmod)(int x, int y, int p);
+# define MULMOD(x,y,p) X(safe_mulmod)(x,y,p)
+#endif
+
+int X(power_mod)(int n, int m, int p);
+int X(find_generator)(int p);
+int X(first_divisor)(int n);
+int X(is_prime)(int n);
+int X(next_prime)(int n);
+
+#define GENERIC_MIN_BAD 71 /* min prime for which generic becomes bad */
+
+/*-----------------------------------------------------------------------*/
+/* rader.c: */
+typedef struct rader_tls rader_tl;
+
+void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl);
+R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t);
+void X(rader_tl_delete)(R *W, rader_tl **tl);
+
+/*-----------------------------------------------------------------------*/
+/* transpose.c: */
+
+void X(transpose)(R *A, int n, int m, int d, int N, R *buf);
+void X(transpose_slow)(R *a, int nx, int ny, int N,
+ char *move, int move_size, R *buf);
+int X(transposable)(const iodim *a, const iodim *b,
+ int vl, int s, R *ri, R *ii);
+void X(transpose_dims)(const iodim *a, const iodim *b,
+ int *n, int *m, int *d, int *nd, int *md);
+int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s,
+ R *ri, R *ii);
+int X(transpose_slowp)(const iodim *a, const iodim *b, int N);
+
+/*-----------------------------------------------------------------------*/
+/* misc stuff */
+void X(null_awake)(plan *ego, int awake);
+int X(square)(int x);
+double X(measure_execution_time)(plan *pln, const problem *p);
+int X(alignment_of)(R *p);
+unsigned X(hash)(const char *s);
+int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz);
+int X(ct_uglyp)(int min_n, int n, int r);
+
+#if HAVE_SIMD
+R *X(taint)(R *p, int s);
+R *X(join_taint)(R *p1, R *p2);
+#define TAINT(p, s) X(taint)(p, s)
+#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3))
+#define TAINTOF(p) (((uintptr_t)(p)) & 3)
+#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2)
+#else
+#define TAINT(p, s) (p)
+#define UNTAINT(p) (p)
+#define TAINTOF(p) 0
+#define JOIN_TAINT(p1, p2) p1
+#endif
+
+#ifdef FFTW_DEBUG_ALIGNMENT
+# define ASSERT_ALIGNED_DOUBLE { \
+ double __foo; \
+ CK(!(((uintptr_t) &__foo) & 0x7)); \
+}
+#else
+# define ASSERT_ALIGNED_DOUBLE
+#endif /* FFTW_DEBUG_ALIGNMENT */
+
+
+
+/*-----------------------------------------------------------------------*/
+/* macros used in codelets to reduce source code size */
+
+typedef R E; /* internal precision of codelets. */
+
+#ifdef FFTW_LDOUBLE
+# define K(x) ((E) x##L)
+#else
+# define K(x) ((E) x)
+#endif
+#define DK(name, value) const E name = K(value)
+
+/* FMA macros */
+
+#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))
+/* this peculiar coding seems to do the right thing on all of
+ gcc-2.95, gcc-3.1, and gcc-3.2.
+
+ The obvious expression a * b + c does not work. If both x = a * b
+ + c and y = a * b - c appear in the source, gcc computes t = a * b,
+ x = t + c, y = t - c, thus destroying the fma.
+*/
+static __inline__ E FMA(E a, E b, E c)
+{
+ E x = a * b;
+ x = x + c;
+ return x;
+}
+
+static __inline__ E FMS(E a, E b, E c)
+{
+ E x = a * b;
+ x = x - c;
+ return x;
+}
+
+static __inline__ E FNMA(E a, E b, E c)
+{
+ E x = a * b;
+ x = - (x + c);
+ return x;
+}
+
+static __inline__ E FNMS(E a, E b, E c)
+{
+ E x = a * b;
+ x = - (x - c);
+ return x;
+}
+#else
+#define FMA(a, b, c) (((a) * (b)) + (c))
+#define FMS(a, b, c) (((a) * (b)) - (c))
+#define FNMA(a, b, c) (- (((a) * (b)) + (c)))
+#define FNMS(a, b, c) ((c) - ((a) * (b)))
+#endif
+
+
+/* stack-alignment hackery */
+#if defined(__GNUC__) && defined(__i386__)
+/*
+ * horrible hack to align the stack to a 16-byte boundary.
+ *
+ * We assume a gcc version >= 2.95 so that
+ * -mpreferred-stack-boundary works. Otherwise, all bets are
+ * off. However, -mpreferred-stack-boundary does not create a
+ * stack alignment, but it only preserves it. Unfortunately,
+ * many versions of libc on linux call main() with the wrong
+ * initial stack alignment, with the result that the code is now
+ * pessimally aligned instead of having a 50% chance of being
+ * correct.
+ */
+
+#define WITH_ALIGNED_STACK(what) \
+{ \
+ /* \
+ * Use alloca to allocate some memory on the stack. \
+ * This alerts gcc that something funny is going \
+ * on, so that it does not omit the frame pointer \
+ * etc. \
+ */ \
+ (void)__builtin_alloca(16); \
+ \
+ /* \
+ * Now align the stack pointer \
+ */ \
+ __asm__ __volatile__ ("andl $-16, %esp"); \
+ \
+ what \
+}
+#endif
+
+#ifdef __ICC /* Intel's compiler for ia32 */
+#define WITH_ALIGNED_STACK(what) \
+{ \
+ /* \
+ * Simply calling alloca seems to do the right thing. \
+ * The size of the allocated block seems to be irrelevant. \
+ */ \
+ _alloca(16); \
+ what \
+}
+#endif
+
+#ifndef WITH_ALIGNED_STACK
+#define WITH_ALIGNED_STACK(what) what
+#endif
+
+#endif /* __IFFTW_H__ */