diff options
author | scuri <scuri> | 2009-08-20 12:35:06 +0000 |
---|---|---|
committer | scuri <scuri> | 2009-08-20 12:35:06 +0000 |
commit | 5d735255ddd3cb2f547abd3d03969af3fb7eb04e (patch) | |
tree | 8fb66510bc625bb1b08ccb41f1b83fb0f7cb8f19 /src/fftw3/kernel/ifftw.h | |
parent | 35733b87eed86e5228f12fa10c98a3d9d22a6073 (diff) |
*** empty log message ***
Diffstat (limited to 'src/fftw3/kernel/ifftw.h')
-rw-r--r-- | src/fftw3/kernel/ifftw.h | 848 |
1 files changed, 0 insertions, 848 deletions
diff --git a/src/fftw3/kernel/ifftw.h b/src/fftw3/kernel/ifftw.h deleted file mode 100644 index 0269e18..0000000 --- a/src/fftw3/kernel/ifftw.h +++ /dev/null @@ -1,848 +0,0 @@ -/* - * Copyright (c) 2003 Matteo Frigo - * Copyright (c) 2003 Massachusetts Institute of Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* $Id: ifftw.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */ - -/* FFTW internal header file */ -#ifndef __IFFTW_H__ -#define __IFFTW_H__ - -#include "config.h" - -#include <stdlib.h> /* size_t */ -#include <stdarg.h> /* va_list */ -#include <stddef.h> /* ptrdiff_t */ - -#if HAVE_SYS_TYPES_H -# include <sys/types.h> -#endif - -#if HAVE_STDINT_H -# include <stdint.h> /* uintptr_t, maybe */ -#endif - -#if HAVE_INTTYPES_H -# include <inttypes.h> /* uintptr_t, maybe */ -#endif - -/* determine precision and name-mangling scheme */ -#define CONCAT(prefix, name) prefix ## name -#if defined(FFTW_SINGLE) -typedef float R; -#define X(name) CONCAT(fftwf_, name) -#elif defined(FFTW_LDOUBLE) -typedef long double R; -#define X(name) CONCAT(fftwl_, name) -#else -typedef double R; -#define X(name) CONCAT(fftw_, name) -#endif - -/* dummy use of unused parameters to silence compiler warnings */ -#define UNUSED(x) (void)x - -#define FFT_SIGN (-1) /* sign convention for forward transforms */ - -/* get rid of that object-oriented stink: */ -#define REGISTER_SOLVER(p, s) X(solver_register)(p, s) - -#define STRINGIZEx(x) #x -#define STRINGIZE(x) STRINGIZEx(x) - -#ifndef HAVE_K7 -#define HAVE_K7 0 -#endif - -#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || defined(HAVE_3DNOW) -#define HAVE_SIMD 1 -#else -#define HAVE_SIMD 0 -#endif - -/* forward declarations */ -typedef struct problem_s problem; -typedef struct plan_s plan; -typedef struct solver_s solver; -typedef struct planner_s planner; -typedef struct printer_s printer; -typedef struct scanner_s scanner; - -/*-----------------------------------------------------------------------*/ -/* alloca: */ -#if HAVE_SIMD -#define MIN_ALIGNMENT 16 -#endif - -#ifdef HAVE_ALLOCA - /* use alloca if available */ - -#ifndef alloca -#ifdef __GNUC__ -# define alloca __builtin_alloca -#else -# ifdef _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# if HAVE_ALLOCA_H -# include <alloca.h> -# else -# ifdef _AIX - #pragma alloca -# else -# ifndef alloca /* predefined by HP cc +Olibcalls */ -void *alloca(size_t); -# endif -# endif -# endif -# endif -#endif -#endif - -# ifdef MIN_ALIGNMENT -# define STACK_MALLOC(T, p, x) \ - { \ - p = (T)alloca((x) + MIN_ALIGNMENT); \ - p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) & \ - (~(uintptr_t)(MIN_ALIGNMENT - 1))); \ - } -# define STACK_FREE(x) -# else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */ -# define STACK_MALLOC(T, p, x) p = (T)alloca(x) -# define STACK_FREE(x) -# endif - -#else /* ! HAVE_ALLOCA */ - /* use malloc instead of alloca */ -# define STACK_MALLOC(T, p, x) p = (T)MALLOC(x, OTHER) -# define STACK_FREE(x) X(ifree)(x) -#endif /* ! HAVE_ALLOCA */ - -/*-----------------------------------------------------------------------*/ -/* define uintptr_t if it is not already defined */ - -#ifndef HAVE_UINTPTR_T -# if SIZEOF_VOID_P == 0 -# error sizeof void* is unknown! -# elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P - typedef unsigned int uintptr_t; -# elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P - typedef unsigned long uintptr_t; -# elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P - typedef unsigned long long uintptr_t; -# else -# error no unsigned integer type matches void* sizeof! -# endif -#endif - -/*-----------------------------------------------------------------------*/ -/* assert.c: */ -extern void X(assertion_failed)(const char *s, int line, const char *file); - -/* always check */ -#define CK(ex) \ - (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) - -#ifdef FFTW_DEBUG -/* check only if debug enabled */ -#define A(ex) \ - (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0)) -#else -#define A(ex) /* nothing */ -#endif - -extern void X(debug)(const char *format, ...); -#define D X(debug) - -/*-----------------------------------------------------------------------*/ -/* alloc.c: */ - -/* objects allocated by malloc, for statistical purposes */ -enum malloc_tag { - EVERYTHING, - PLANS, - SOLVERS, - PROBLEMS, - BUFFERS, - HASHT, - TENSORS, - PLANNERS, - SLVDESCS, - TWIDDLES, - STRIDES, - OTHER, - MALLOC_WHAT_LAST /* must be last */ -}; - -extern void X(ifree)(void *ptr); -extern void X(ifree0)(void *ptr); - -#ifdef FFTW_DEBUG_MALLOC - -extern void *X(malloc_debug)(size_t n, enum malloc_tag what, - const char *file, int line); -#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__) -#define NATIVE_MALLOC(n, what) MALLOC(n, what) -void X(malloc_print_minfo)(int vrbose); - -#else /* ! FFTW_DEBUG_MALLOC */ - -extern void *X(malloc_plain)(size_t sz); -#define MALLOC(n, what) X(malloc_plain)(n) -#define NATIVE_MALLOC(n, what) malloc(n) - -#endif - -#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && defined(HAVE_THREADS) -extern int X(in_thread); -# define IN_THREAD X(in_thread) -# define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1 -# define THREAD_OFF X(in_thread) = in_thread_save; } -#else -# define IN_THREAD 0 -# define THREAD_ON -# define THREAD_OFF -#endif - -/*-----------------------------------------------------------------------*/ -/* ops.c: */ -/* - * ops counter. The total number of additions is add + fma - * and the total number of multiplications is mul + fma. - * Total flops = add + mul + 2 * fma - */ -typedef struct { - double add; - double mul; - double fma; - double other; -} opcnt; - -void X(ops_zero)(opcnt *dst); -void X(ops_other)(int o, opcnt *dst); -void X(ops_cpy)(const opcnt *src, opcnt *dst); - -void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst); -void X(ops_add2)(const opcnt *a, opcnt *dst); - -/* dst = m * a + b */ -void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst); - -/* dst += m * a */ -void X(ops_madd2)(int m, const opcnt *a, opcnt *dst); - - -/*-----------------------------------------------------------------------*/ -/* minmax.c: */ -int X(imax)(int a, int b); -int X(imin)(int a, int b); - -/*-----------------------------------------------------------------------*/ -/* iabs.c: */ -int X(iabs)(int a); - -/*-----------------------------------------------------------------------*/ -/* md5.c */ - -#if SIZEOF_UNSIGNED_INT >= 4 -typedef unsigned int md5uint; -#else -typedef unsigned long md5uint; /* at least 32 bits as per C standard */ -#endif - -typedef md5uint md5sig[4]; - -typedef struct { - md5sig s; /* state and signature */ - - /* fields not meant to be used outside md5.c: */ - unsigned char c[64]; /* stuff not yet processed */ - unsigned l; /* total length. Should be 64 bits long, but this is - good enough for us */ -} md5; - -void X(md5begin)(md5 *p); -void X(md5putb)(md5 *p, const void *d_, int len); -void X(md5puts)(md5 *p, const char *s); -void X(md5putc)(md5 *p, unsigned char c); -void X(md5int)(md5 *p, int i); -void X(md5unsigned)(md5 *p, unsigned i); -void X(md5ptrdiff)(md5 *p, ptrdiff_t d); -void X(md5end)(md5 *p); - -/*-----------------------------------------------------------------------*/ -/* tensor.c: */ -#define STRUCT_HACK_KR -#undef STRUCT_HACK_C99 - -typedef struct { - int n; - int is; /* input stride */ - int os; /* output stride */ -} iodim; - -typedef struct { - int rnk; -#if defined(STRUCT_HACK_KR) - iodim dims[1]; -#elif defined(STRUCT_HACK_C99) - iodim dims[]; -#else - iodim *dims; -#endif -} tensor; - -/* - Definition of rank -infinity. - This definition has the property that if you want rank 0 or 1, - you can simply test for rank <= 1. This is a common case. - - A tensor of rank -infinity has size 0. -*/ -#define RNK_MINFTY ((int)(((unsigned) -1) >> 1)) -#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY) - -typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; - -tensor *X(mktensor)(int rnk); -tensor *X(mktensor_0d)(void); -tensor *X(mktensor_1d)(int n, int is, int os); -tensor *X(mktensor_2d)(int n0, int is0, int os0, - int n1, int is1, int os1); -int X(tensor_sz)(const tensor *sz); -void X(tensor_md5)(md5 *p, const tensor *t); -int X(tensor_max_index)(const tensor *sz); -int X(tensor_min_istride)(const tensor *sz); -int X(tensor_min_ostride)(const tensor *sz); -int X(tensor_min_stride)(const tensor *sz); -int X(tensor_inplace_strides)(const tensor *sz); -int X(tensor_inplace_strides2)(const tensor *a, const tensor *b); -tensor *X(tensor_copy)(const tensor *sz); -int X(tensor_kosherp)(const tensor *x); - -tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k); -tensor *X(tensor_copy_except)(const tensor *sz, int except_dim); -tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk); -tensor *X(tensor_compress)(const tensor *sz); -tensor *X(tensor_compress_contiguous)(const tensor *sz); -tensor *X(tensor_append)(const tensor *a, const tensor *b); -void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b); -int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os); -void X(tensor_destroy)(tensor *sz); -void X(tensor_destroy2)(tensor *a, tensor *b); -void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d); -void X(tensor_print)(const tensor *sz, printer *p); -int X(dimcmp)(const iodim *a, const iodim *b); - -/*-----------------------------------------------------------------------*/ -/* problem.c: */ -typedef struct { - void (*hash) (const problem *ego, md5 *p); - void (*zero) (const problem *ego); - void (*print) (problem *ego, printer *p); - void (*destroy) (problem *ego); -} problem_adt; - -struct problem_s { - const problem_adt *adt; -}; - -problem *X(mkproblem)(size_t sz, const problem_adt *adt); -void X(problem_destroy)(problem *ego); - -/*-----------------------------------------------------------------------*/ -/* print.c */ -struct printer_s { - void (*print)(printer *p, const char *format, ...); - void (*vprint)(printer *p, const char *format, va_list ap); - void (*putchr)(printer *p, char c); - void (*cleanup)(printer *p); - int indent; - int indent_incr; -}; - -printer *X(mkprinter)(size_t size, - void (*putchr)(printer *p, char c), - void (*cleanup)(printer *p)); -void X(printer_destroy)(printer *p); - -/*-----------------------------------------------------------------------*/ -/* scan.c */ -struct scanner_s { - int (*scan)(scanner *sc, const char *format, ...); - int (*vscan)(scanner *sc, const char *format, va_list ap); - int (*getchr)(scanner *sc); - int ungotc; -}; - -scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc)); -void X(scanner_destroy)(scanner *sc); - -/*-----------------------------------------------------------------------*/ -/* plan.c: */ -typedef struct { - void (*solve)(const plan *ego, const problem *p); - void (*awake)(plan *ego, int flag); - void (*print)(const plan *ego, printer *p); - void (*destroy)(plan *ego); -} plan_adt; - -struct plan_s { - const plan_adt *adt; - int awake_refcnt; - opcnt ops; - double pcost; -}; - -plan *X(mkplan)(size_t size, const plan_adt *adt); -void X(plan_destroy_internal)(plan *ego); -void X(plan_awake)(plan *ego, int flag); -#define AWAKE(plan, flag) X(plan_awake)(plan, flag) -void X(plan_null_destroy)(plan *ego); - -/*-----------------------------------------------------------------------*/ -/* solver.c: */ -typedef struct { - plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr); -} solver_adt; - -struct solver_s { - const solver_adt *adt; - int refcnt; -}; - -solver *X(mksolver)(size_t size, const solver_adt *adt); -void X(solver_use)(solver *ego); -void X(solver_destroy)(solver *ego); -void X(solver_register)(planner *plnr, solver *s); - -/* shorthand */ -#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt) - -/*-----------------------------------------------------------------------*/ -/* planner.c */ - -typedef struct slvdesc_s { - solver *slv; - const char *reg_nam; - unsigned nam_hash; - int reg_id; -} slvdesc; - -typedef struct solution_s solution; /* opaque */ - -/* values for problem_flags: */ -enum { - DESTROY_INPUT = 0x1, - NO_SIMD = 0x2, - CONSERVE_MEMORY = 0x4, - NO_DHT_R2HC = 0x8 -}; - -#define DESTROY_INPUTP(plnr) ((plnr)->problem_flags & DESTROY_INPUT) -#define NO_SIMDP(plnr) ((plnr)->problem_flags & NO_SIMD) -#define CONSERVE_MEMORYP(plnr) ((plnr)->problem_flags & CONSERVE_MEMORY) -#define NO_DHT_R2HCP(plnr) ((plnr)->problem_flags & NO_DHT_R2HC) - -/* values for planner_flags: */ -enum { - /* impatience flags */ - - BELIEVE_PCOST = 0x1, - DFT_R2HC_ICKY = 0x2, - NONTHREADED_ICKY = 0x4, - NO_BUFFERING = 0x8, - NO_EXHAUSTIVE = 0x10, - NO_INDIRECT_OP = 0x20, - NO_LARGE_GENERIC = 0x40, - NO_RANK_SPLITS = 0x80, - NO_VRANK_SPLITS = 0x100, - NO_VRECURSE = 0x200, - - /* flags that control the search */ - NO_UGLY = 0x400, /* avoid plans we are 99% sure are suboptimal */ - NO_SEARCH = 0x800, /* avoid searching altogether---use wisdom entries - only */ - - ESTIMATE = 0x1000, - IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)), - - BLESSING = 0x4000, /* save this entry */ - H_VALID = 0x8000, /* valid hastable entry */ - NONIMPATIENCE_FLAGS = BLESSING -}; - -#define BELIEVE_PCOSTP(plnr) ((plnr)->planner_flags & BELIEVE_PCOST) -#define DFT_R2HC_ICKYP(plnr) ((plnr)->planner_flags & DFT_R2HC_ICKY) -#define ESTIMATEP(plnr) ((plnr)->planner_flags & ESTIMATE) -#define NONTHREADED_ICKYP(plnr) (((plnr)->planner_flags & NONTHREADED_ICKY) \ - && (plnr)->nthr > 1) -#define NO_BUFFERINGP(plnr) ((plnr)->planner_flags & NO_BUFFERING) -#define NO_EXHAUSTIVEP(plnr) ((plnr)->planner_flags & NO_EXHAUSTIVE) -#define NO_INDIRECT_OP_P(plnr) ((plnr)->planner_flags & NO_INDIRECT_OP) -#define NO_LARGE_GENERICP(plnr) ((plnr)->planner_flags & NO_LARGE_GENERIC) -#define NO_RANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_RANK_SPLITS) -#define NO_UGLYP(plnr) ((plnr)->planner_flags & NO_UGLY) -#define NO_SEARCHP(plnr) ((plnr)->planner_flags & NO_SEARCH) -#define NO_VRANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_VRANK_SPLITS) -#define NO_VRECURSEP(plnr) ((plnr)->planner_flags & NO_VRECURSE) - -typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia; - -typedef struct { - void (*register_solver)(planner *ego, solver *s); - plan *(*mkplan)(planner *ego, problem *p); - void (*forget)(planner *ego, amnesia a); - void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved - word in C++. */ - int (*imprt)(planner *ego, scanner *sc); -} planner_adt; - -struct planner_s { - const planner_adt *adt; - void (*hook)(plan *pln, const problem *p, int optimalp); - - /* solver descriptors */ - slvdesc *slvdescs; - unsigned nslvdesc, slvdescsiz; - const char *cur_reg_nam; - int cur_reg_id; - - /* hash table of solutions */ - solution *solutions; - unsigned hashsiz, nelem; - - int nthr; - unsigned problem_flags; - unsigned short planner_flags; /* matches type of solution.flags in - planner.c */ - /* various statistics */ - int nplan; /* number of plans evaluated */ - double pcost, epcost; /* total pcost of measured/estimated plans */ - int nprob; /* number of problems evaluated */ - int lookup, succ_lookup, lookup_iter; - int insert, insert_iter, insert_unknown; - int nrehash; -}; - -planner *X(mkplanner)(void); -void X(planner_destroy)(planner *ego); - -#ifdef FFTW_DEBUG -void X(planner_dump)(planner *ego, int vrbose); -#endif - -/* - Iterate over all solvers. Read: - - @article{ baker93iterators, - author = "Henry G. Baker, Jr.", - title = "Iterators: Signs of Weakness in Object-Oriented Languages", - journal = "{ACM} {OOPS} Messenger", - volume = "4", - number = "3", - pages = "18--25" - } -*/ -#define FORALL_SOLVERS(ego, s, p, what) \ -{ \ - unsigned _cnt; \ - for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) { \ - slvdesc *p = ego->slvdescs + _cnt; \ - solver *s = p->slv; \ - what; \ - } \ -} - -/* make plan, destroy problem */ -plan *X(mkplan_d)(planner *ego, problem *p); - -/*-----------------------------------------------------------------------*/ -/* stride.c: */ - -/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */ -#if (defined(__i386__) || _M_IX86 >= 500) && !HAVE_K7 && !defined(FFTW_LDOUBLE) -#define PRECOMPUTE_ARRAY_INDICES -#endif - -#ifdef PRECOMPUTE_ARRAY_INDICES -typedef int *stride; -#define WS(stride, i) (stride[i]) -extern stride X(mkstride)(int n, int s); -void X(stride_destroy)(stride p); - -#else - -typedef int stride; -#define WS(stride, i) (stride * i) -#define fftwf_mkstride(n, stride) stride -#define fftw_mkstride(n, stride) stride -#define fftwl_mkstride(n, stride) stride -#define fftwf_stride_destroy(p) ((void) p) -#define fftw_stride_destroy(p) ((void) p) -#define fftwl_stride_destroy(p) ((void) p) - -#endif /* PRECOMPUTE_ARRAY_INDICES */ - -/*-----------------------------------------------------------------------*/ -/* solvtab.c */ - -struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; -typedef struct solvtab_s solvtab[]; -void X(solvtab_exec)(solvtab tbl, planner *p); -#define SOLVTAB(s) { s, STRINGIZE(s) } -#define SOLVTAB_END { 0, 0 } - -/*-----------------------------------------------------------------------*/ -/* pickdim.c */ -int X(pickdim)(int which_dim, const int *buddies, int nbuddies, - const tensor *sz, int oop, int *dp); - -/*-----------------------------------------------------------------------*/ -/* twiddle.c */ -/* little language to express twiddle factors computation */ -enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3, - TW_FULL = 4, TW_GENERIC = 5 }; - -typedef struct { - unsigned char op; - unsigned char v; - short i; -} tw_instr; - -typedef struct twid_s { - R *W; /* array of twiddle factors */ - int n, r, m; /* transform order, radix, # twiddle rows */ - int refcnt; - const tw_instr *instr; - struct twid_s *cdr; -} twid; - -void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m); -void X(twiddle_destroy)(twid **pp); -int X(twiddle_length)(int r, const tw_instr *p); -void X(twiddle_awake)(int flg, twid **pp, - const tw_instr *instr, int n, int r, int m); - -/*-----------------------------------------------------------------------*/ -/* trig.c */ -#ifdef FFTW_LDOUBLE -typedef long double trigreal; -#else -typedef double trigreal; -#endif - -extern trigreal X(cos2pi)(int, int); -extern trigreal X(sin2pi)(int, int); -extern trigreal X(tan2pi)(int, int); -extern trigreal X(sincos)(trigreal m, trigreal n, int sinp); - -/*-----------------------------------------------------------------------*/ -/* primes.c: */ - -#if defined(FFTW_ENABLE_UNSAFE_MULMOD) -# define MULMOD(x,y,p) (((x) * (y)) % (p)) -#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG >= 2 * SIZEOF_INT)) -# define MULMOD(x,y,p) ((int) ((((long) (x)) * ((long) (y))) % ((long) (p)))) -#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT)) -# define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \ - % ((long long) (p)))) -#elif defined(_MSC_VER) -# define MULMOD(x,y,p) ((int) ((((__int64) (x)) * ((__int64) (y))) \ - % ((__int64) (p)))) -#else /* 'long long' unavailable */ -# define SAFE_MULMOD 1 -int X(safe_mulmod)(int x, int y, int p); -# define MULMOD(x,y,p) X(safe_mulmod)(x,y,p) -#endif - -int X(power_mod)(int n, int m, int p); -int X(find_generator)(int p); -int X(first_divisor)(int n); -int X(is_prime)(int n); -int X(next_prime)(int n); - -#define GENERIC_MIN_BAD 71 /* min prime for which generic becomes bad */ - -/*-----------------------------------------------------------------------*/ -/* rader.c: */ -typedef struct rader_tls rader_tl; - -void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl); -R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t); -void X(rader_tl_delete)(R *W, rader_tl **tl); - -/*-----------------------------------------------------------------------*/ -/* transpose.c: */ - -void X(transpose)(R *A, int n, int m, int d, int N, R *buf); -void X(transpose_slow)(R *a, int nx, int ny, int N, - char *move, int move_size, R *buf); -int X(transposable)(const iodim *a, const iodim *b, - int vl, int s, R *ri, R *ii); -void X(transpose_dims)(const iodim *a, const iodim *b, - int *n, int *m, int *d, int *nd, int *md); -int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s, - R *ri, R *ii); -int X(transpose_slowp)(const iodim *a, const iodim *b, int N); - -/*-----------------------------------------------------------------------*/ -/* misc stuff */ -void X(null_awake)(plan *ego, int awake); -int X(square)(int x); -double X(measure_execution_time)(plan *pln, const problem *p); -int X(alignment_of)(R *p); -unsigned X(hash)(const char *s); -int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz); -int X(ct_uglyp)(int min_n, int n, int r); - -#if HAVE_SIMD -R *X(taint)(R *p, int s); -R *X(join_taint)(R *p1, R *p2); -#define TAINT(p, s) X(taint)(p, s) -#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3)) -#define TAINTOF(p) (((uintptr_t)(p)) & 3) -#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2) -#else -#define TAINT(p, s) (p) -#define UNTAINT(p) (p) -#define TAINTOF(p) 0 -#define JOIN_TAINT(p1, p2) p1 -#endif - -#ifdef FFTW_DEBUG_ALIGNMENT -# define ASSERT_ALIGNED_DOUBLE { \ - double __foo; \ - CK(!(((uintptr_t) &__foo) & 0x7)); \ -} -#else -# define ASSERT_ALIGNED_DOUBLE -#endif /* FFTW_DEBUG_ALIGNMENT */ - - - -/*-----------------------------------------------------------------------*/ -/* macros used in codelets to reduce source code size */ - -typedef R E; /* internal precision of codelets. */ - -#ifdef FFTW_LDOUBLE -# define K(x) ((E) x##L) -#else -# define K(x) ((E) x) -#endif -#define DK(name, value) const E name = K(value) - -/* FMA macros */ - -#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__)) -/* this peculiar coding seems to do the right thing on all of - gcc-2.95, gcc-3.1, and gcc-3.2. - - The obvious expression a * b + c does not work. If both x = a * b - + c and y = a * b - c appear in the source, gcc computes t = a * b, - x = t + c, y = t - c, thus destroying the fma. -*/ -static __inline__ E FMA(E a, E b, E c) -{ - E x = a * b; - x = x + c; - return x; -} - -static __inline__ E FMS(E a, E b, E c) -{ - E x = a * b; - x = x - c; - return x; -} - -static __inline__ E FNMA(E a, E b, E c) -{ - E x = a * b; - x = - (x + c); - return x; -} - -static __inline__ E FNMS(E a, E b, E c) -{ - E x = a * b; - x = - (x - c); - return x; -} -#else -#define FMA(a, b, c) (((a) * (b)) + (c)) -#define FMS(a, b, c) (((a) * (b)) - (c)) -#define FNMA(a, b, c) (- (((a) * (b)) + (c))) -#define FNMS(a, b, c) ((c) - ((a) * (b))) -#endif - - -/* stack-alignment hackery */ -#if defined(__GNUC__) && defined(__i386__) -/* - * horrible hack to align the stack to a 16-byte boundary. - * - * We assume a gcc version >= 2.95 so that - * -mpreferred-stack-boundary works. Otherwise, all bets are - * off. However, -mpreferred-stack-boundary does not create a - * stack alignment, but it only preserves it. Unfortunately, - * many versions of libc on linux call main() with the wrong - * initial stack alignment, with the result that the code is now - * pessimally aligned instead of having a 50% chance of being - * correct. - */ - -#define WITH_ALIGNED_STACK(what) \ -{ \ - /* \ - * Use alloca to allocate some memory on the stack. \ - * This alerts gcc that something funny is going \ - * on, so that it does not omit the frame pointer \ - * etc. \ - */ \ - (void)__builtin_alloca(16); \ - \ - /* \ - * Now align the stack pointer \ - */ \ - __asm__ __volatile__ ("andl $-16, %esp"); \ - \ - what \ -} -#endif - -#ifdef __ICC /* Intel's compiler for ia32 */ -#define WITH_ALIGNED_STACK(what) \ -{ \ - /* \ - * Simply calling alloca seems to do the right thing. \ - * The size of the allocated block seems to be irrelevant. \ - */ \ - _alloca(16); \ - what \ -} -#endif - -#ifndef WITH_ALIGNED_STACK -#define WITH_ALIGNED_STACK(what) what -#endif - -#endif /* __IFFTW_H__ */ |