summaryrefslogtreecommitdiff
path: root/src/fftw/fftw-int.h
blob: 2c363fc640b5427b0292dd8e6a28b93547cc54be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
/*
 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* fftw.h -- system-wide definitions */
/* $Id: fftw-int.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */

#ifndef FFTW_INT_H
#define FFTW_INT_H
#include "config.h"
#include "fftw.h"

#ifdef __cplusplus
extern "C" {
#endif				/* __cplusplus */

/****************************************************************************/
/*                            Private Functions                             */
/****************************************************************************/

extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d);
extern void fftw_destroy_twiddle(fftw_twiddle *tw);

extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *);
extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *,
				 fftw_plan_node *, int, int,
				 fftw_recurse_kind recurse_kind);

extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
					  fftw_direction dir, int flags);
extern fftw_plan *fftwnd_new_plan_array(int rank);
extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
					      int rank, const int *n,
					      fftw_direction dir, int flags);
extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
					       int rank, const int *n,
					       const int *n_after,
					       fftw_direction dir, int flags,
					       fftw_complex *in, int istride,
					       fftw_complex *out, int ostride);
extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies);

extern void fftwnd_aux(fftwnd_plan p, int cur_dim,
		       fftw_complex *in, int istride,
		       fftw_complex *out, int ostride,
		       fftw_complex *work);
extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
			       int howmany,
			       fftw_complex *in, int istride, int idist,
			       fftw_complex *out, int ostride, int odist,
			       fftw_complex *work);

/* wisdom prototypes */
enum fftw_wisdom_category {
     FFTW_WISDOM, RFFTW_WISDOM
};

extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
			      enum fftw_wisdom_category category,
			      int istride, int ostride,
			      enum fftw_node_type *type,
			      int *signature,
			      fftw_recurse_kind *recurse_kind, int replace_p);
extern void fftw_wisdom_add(int n, int flags, fftw_direction dir,
			    enum fftw_wisdom_category cat,
			    int istride, int ostride,
			    enum fftw_node_type type,
			    int signature,
			    fftw_recurse_kind recurse_kind);

/* Private planner functions: */
extern double fftw_estimate_node(fftw_plan_node *p);
extern fftw_plan_node *fftw_make_node_notw(int size,
					const fftw_codelet_desc *config);
extern fftw_plan_node *fftw_make_node_real2hc(int size,
					const fftw_codelet_desc *config);
extern fftw_plan_node *fftw_make_node_hc2real(int size,
					const fftw_codelet_desc *config);
extern fftw_plan_node *fftw_make_node_twiddle(int n,
					 const fftw_codelet_desc *config,
					      fftw_plan_node *recurse,
					      int flags);
extern fftw_plan_node *fftw_make_node_hc2hc(int n,
					    fftw_direction dir,
					 const fftw_codelet_desc *config,
					    fftw_plan_node *recurse,
					    int flags);
extern fftw_plan_node *fftw_make_node_generic(int n, int size,
					      fftw_generic_codelet *codelet,
					      fftw_plan_node *recurse,
					      int flags);
extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
					       fftw_direction dir,
					       fftw_rgeneric_codelet * codelet,
					       fftw_plan_node *recurse,
					       int flags);
extern int fftw_factor(int n);
extern fftw_plan_node *fftw_make_node(void);
extern fftw_plan fftw_make_plan(int n, fftw_direction dir,
				fftw_plan_node *root, int flags,
				enum fftw_node_type wisdom_type,
				int wisdom_signature,
				fftw_recurse_kind recurse_kind,
				int vector_size);
extern void fftw_use_plan(fftw_plan p);
extern void fftw_use_node(fftw_plan_node *p);
extern void fftw_destroy_plan_internal(fftw_plan p);
extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2);
extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags,
			     int vector_size);
extern void fftw_insert(fftw_plan *table, fftw_plan this_plan);
extern void fftw_make_empty_table(fftw_plan *table);
extern void fftw_destroy_table(fftw_plan *table);
extern void fftw_complete_twiddle(fftw_plan_node *p, int n);

extern fftw_plan_node *fftw_make_node_rader(int n, int size,
					    fftw_direction dir,
					    fftw_plan_node *recurse,
					    int flags);
extern fftw_rader_data *fftw_rader_top;

/* undocumented debugging hook */
typedef void (*fftw_plan_hook_ptr) (fftw_plan plan);
extern DL_IMPORT(fftw_plan_hook_ptr) fftw_plan_hook;
extern DL_IMPORT(fftw_plan_hook_ptr) rfftw_plan_hook;

/****************************************************************************/
/*                          Overflow-safe multiply                          */
/****************************************************************************/

/* The Rader routines do a lot of operations of the form (x * y) % p, which
   are vulnerable to overflow problems for large p.  To get around this,
   we either use "long long" arithmetic (if it is available and double
   the size of int), or default to a subroutine defined in twiddle.c. */

#if defined(FFTW_ENABLE_UNSAFE_MULMOD)
#  define MULMOD(x,y,p) (((x) * (y)) % (p))
#elif defined(LONGLONG_IS_TWOINTS)
#  define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \
				% ((long long) (p))))
#else
#  define USE_FFTW_SAFE_MULMOD
#  define MULMOD(x,y,p) fftw_safe_mulmod(x,y,p)
extern int fftw_safe_mulmod(int x, int y, int p);
#endif

/****************************************************************************/
/*                           Floating Point Types                           */
/****************************************************************************/

/*
 * We use these definitions to make it easier for people to change
 * FFTW to use long double and similar types. You shouldn't have to
 * change this just to use float or double. 
 */

/*
 * Change this if your floating-point constants need to be expressed
 * in a special way.  For example, if fftw_real is long double, you
 * will need to append L to your fp constants to make them of the
 * same precision.  Do this by changing "x" below to "x##L". 
 */
#define FFTW_KONST(x) ((fftw_real) x)

/*
 * Ordinarily, we use the standard sin/cos functions to compute trig.
 * constants.  You'll need to change these if fftw_real has more
 * than double precision.
 */
#define FFTW_TRIG_SIN sin
#define FFTW_TRIG_COS cos
typedef double FFTW_TRIG_REAL;	/* the argument type for sin and cos */

#define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388)

/****************************************************************************/
/*                               gcc/x86 hacks                              */
/****************************************************************************/

/*
 * gcc 2.[78].x and x86 specific hacks.  These macros align the stack
 * pointer so that the double precision temporary variables in the
 * codelets will be aligned to a multiple of 8 bytes (*way* faster on
 * pentium and pentiumpro)
 */
#ifdef __GNUC__
#  ifdef __i386__
#    ifdef FFTW_ENABLE_I386_HACKS
#      ifndef FFTW_GCC_ALIGNS_STACK
#      ifndef FFTW_ENABLE_FLOAT
#        define FFTW_USING_I386_HACKS
#        define HACK_ALIGN_STACK_EVEN {                                    \
           if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4);  \
         }

#        define HACK_ALIGN_STACK_ODD {                                     \
           if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \
         }

#      endif /* ! FFTW_ENABLE_FLOAT */
#      endif /* ! FFTW_GCC_ALIGNS_STACK */
#    endif /* FFTW_ENABLE_I386_HACKS */

#    ifdef FFTW_DEBUG_ALIGNMENT
#      define ASSERT_ALIGNED_DOUBLE {                                      \
         double __foo;                                                       \
         if ((((long) &__foo) & 0x7)) abort();                               \
       }
#    endif /* FFTW_DEBUG_ALIGNMENT */

#  endif /* __i386__ */
#endif /* __GNUC__ */

#ifndef HACK_ALIGN_STACK_EVEN
#  define HACK_ALIGN_STACK_EVEN {}
#endif
#ifndef HACK_ALIGN_STACK_ODD
#  define HACK_ALIGN_STACK_ODD {}
#endif
#ifndef ASSERT_ALIGNED_DOUBLE
#  define ASSERT_ALIGNED_DOUBLE {}
#endif

/****************************************************************************/
/*                                  Timers                                  */
/****************************************************************************/

/*
 * Here, you can use all the nice timers available in your machine.
 */

/*
 *
 Things you should define to include your own clock:
 
 fftw_time -- the data type used to store a time
 
 extern fftw_time fftw_get_time(void); 
 -- a function returning the current time.  (We have
 implemented this as a macro in most cases.)
 
 extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
 -- returns the time difference (t1 - t2).
 If t1 < t2, it may simply return zero (although this
 is not required).  (We have implemented this as a macro
 in most cases.)
 
 extern double fftw_time_to_sec(fftw_time t);
 -- returns the time t expressed in seconds, as a double.
 (Implemented as a macro in most cases.)
 
 FFTW_TIME_MIN -- a double-precision macro holding the minimum
 time interval (in seconds) for accurate time measurements.
 This should probably be at least 100 times the precision of
 your clock (we use even longer intervals, to be conservative).
 This will determine how long the planner takes to measure
 the speeds of different possible plans.
 
 Bracket all of your definitions with an appropriate #ifdef so that
 they will be enabled on your machine.  If you do add your own
 high-precision timer code, let us know (at fftw@fftw.org).
 
 Only declarations should go in this file.  Any function definitions
 that you need should go into timer.c.
 */

/*
 * define a symbol so that we know that we have the fftw_time_diff
 * function/macro (it did not exist prior to FFTW 1.2) 
 */
#define FFTW_HAS_TIME_DIFF

/**********************************************
 *              SOLARIS
 **********************************************/
#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T)

/* we use the nanosecond virtual timer */
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif

typedef hrtime_t fftw_time;

#define fftw_get_time() gethrtime()
#define fftw_time_diff(t1,t2) ((t1) - (t2))
#define fftw_time_to_sec(t) ((double) t / 1.0e9)

/*
 * a measurement is valid if it runs for at least
 * FFTW_TIME_MIN seconds.
 */
#define FFTW_TIME_MIN (1.0e-4)	/* for Solaris nanosecond timer */
#define FFTW_TIME_REPEAT 8

/**********************************************
 *        Pentium time stamp counter
 **********************************************/
#elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER)

/*
 * Use internal Pentium register (time stamp counter). Resolution
 * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz).
 * (This code was contributed by Wolfgang Reimer)
 */

#ifndef FFTW_CYCLES_PER_SEC
#error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter"
#endif

typedef unsigned long long fftw_time;

static __inline__ fftw_time read_tsc()
{
     fftw_time ret;

     __asm__ __volatile__("rdtsc": "=A" (ret)); 
     /* no input, nothing else clobbered */
     return ret;
}

#define fftw_get_time()  read_tsc()
#define fftw_time_diff(t1,t2) ((t1) - (t2))
#define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC)
#define FFTW_TIME_MIN (1.0e-4)	/* for Pentium TSC register */

/************* generic systems having gettimeofday ************/
#elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY)
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#define FFTW_USE_GETTIMEOFDAY

typedef struct timeval fftw_time;

extern fftw_time fftw_gettimeofday_get_time(void);
extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2);
#define fftw_get_time() fftw_gettimeofday_get_time()
#define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2)
#define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6)

#ifndef FFTW_TIME_MIN
/* this should be fine on any system claiming a microsecond timer */
#define FFTW_TIME_MIN (1.0e-2)
#endif

/**********************************************
 *              MACINTOSH
 **********************************************/
#elif defined(HAVE_MAC_TIMER)

/*
 * By default, use the microsecond-timer in the Mac Time Manager.
 * Alternatively, by changing the following #if 1 to #if 0, you
 * can use the nanosecond timer available *only* on PCI PowerMacs. 
 * WARNING: the nanosecond timer was just a little experiment;
 * I haven't gotten it to work reliably.  Tips/patches are welcome.
 */
#ifndef HAVE_MAC_PCI_TIMER	/* use time manager */

/*
 * Use Macintosh Time Manager routines (maximum resolution is about 20
 * microseconds). 
 */
typedef struct fftw_time_struct {
     unsigned long hi, lo;
} fftw_time;

extern fftw_time get_Mac_microseconds(void);

#define fftw_get_time() get_Mac_microseconds()

/* define as a function instead of a macro: */
extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);

#define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)

/* very conservative, since timer should be accurate to 20e-6: */
/* (although this seems not to be the case in practice) */
#define FFTW_TIME_MIN (5.0e-2)	/* for MacOS Time Manager timer */

#else				/* use nanosecond timer */

/* Use the nanosecond timer available on PCI PowerMacs. */

#include <DriverServices.h>

typedef AbsoluteTime fftw_time;
#define fftw_get_time() UpTime()
#define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2)
#define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9)

/* Extremely conservative minimum time: */
/* for MacOS PCI PowerMac nanosecond timer */
#define FFTW_TIME_MIN (5.0e-3)	

#endif				/* use nanosecond timer */

/**********************************************
 *              WINDOWS
 **********************************************/
#elif defined(HAVE_WIN32_TIMER)

#include <time.h>

typedef unsigned long fftw_time;
extern unsigned long GetPerfTime(void);
extern double GetPerfSec(double ticks);

#define fftw_get_time() GetPerfTime()
#define fftw_time_diff(t1,t2) ((t1) - (t2))
#define fftw_time_to_sec(t) GetPerfSec(t)

#define FFTW_TIME_MIN (5.0e-2)	/* for Win32 timer */

/**********************************************
 *              CRAY
 **********************************************/
#elif defined(_CRAYMPP)		/* Cray MPP system */

double SECONDR(void);		/* 
				 * I think you have to link with -lsci to
				 * get this 
				 */

typedef double fftw_time;
#define fftw_get_time() SECONDR()
#define fftw_time_diff(t1,t2) ((t1) - (t2))
#define fftw_time_to_sec(t) (t)

#define FFTW_TIME_MIN (1.0e-1)	/* for Cray MPP SECONDR timer */

/**********************************************
 *          VANILLA UNIX/ISO C SYSTEMS
 **********************************************/
/* last resort: use good old Unix clock() */
#else

#include <time.h>

typedef clock_t fftw_time;

#ifndef CLOCKS_PER_SEC
#ifdef sun
/* stupid sunos4 prototypes */
#define CLOCKS_PER_SEC 1000000
extern long clock(void);
#else				/* not sun, we don't know CLOCKS_PER_SEC */
#error Please define CLOCKS_PER_SEC
#endif
#endif

#define fftw_get_time() clock()
#define fftw_time_diff(t1,t2) ((t1) - (t2))
#define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)

/*
 * ***VERY*** conservative constant: this says that a
 * measurement must run for 200ms in order to be valid.
 * You had better check the manual of your machine
 * to discover if it can do better than this
 */
#define FFTW_TIME_MIN (2.0e-1)	/* for default clock() timer */

#endif				/* UNIX clock() */

/* take FFTW_TIME_REPEAT measurements... */
#ifndef FFTW_TIME_REPEAT
#define FFTW_TIME_REPEAT 4
#endif

/* but do not run for more than TIME_LIMIT seconds while measuring one FFT */
#ifndef FFTW_TIME_LIMIT
#define FFTW_TIME_LIMIT 2.0
#endif

#ifdef __cplusplus
}				/* extern "C" */

#endif				/* __cplusplus */

#endif				/* FFTW_INT_H */